Actual source code: mpiaij.c
petsc-3.14.6 2021-03-30
1: #include <../src/mat/impls/aij/mpi/mpiaij.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petsc/private/vecscatterimpl.h>
4: #include <petsc/private/isimpl.h>
5: #include <petscblaslapack.h>
6: #include <petscsf.h>
7: #include <petsc/private/hashmapi.h>
9: /*MC
10: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13: and MATMPIAIJ otherwise. As a result, for single process communicators,
14: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15: for communicators controlling multiple processes. It is recommended that you call both of
16: the above preallocation routines for simplicity.
18: Options Database Keys:
19: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21: Developer Notes:
22: Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23: enough exist.
25: Level: beginner
27: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28: M*/
30: /*MC
31: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
35: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36: for communicators controlling multiple processes. It is recommended that you call both of
37: the above preallocation routines for simplicity.
39: Options Database Keys:
40: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42: Level: beginner
44: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45: M*/
47: static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48: {
49: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
53: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54: A->boundtocpu = flg;
55: #endif
56: if (a->A) {
57: MatBindToCPU(a->A,flg);
58: }
59: if (a->B) {
60: MatBindToCPU(a->B,flg);
61: }
62: return(0);
63: }
66: PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67: {
69: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
72: if (mat->A) {
73: MatSetBlockSizes(mat->A,rbs,cbs);
74: MatSetBlockSizes(mat->B,rbs,1);
75: }
76: return(0);
77: }
79: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80: {
81: PetscErrorCode ierr;
82: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
83: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
84: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
85: const PetscInt *ia,*ib;
86: const MatScalar *aa,*bb;
87: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
88: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
91: *keptrows = NULL;
92: ia = a->i;
93: ib = b->i;
94: for (i=0; i<m; i++) {
95: na = ia[i+1] - ia[i];
96: nb = ib[i+1] - ib[i];
97: if (!na && !nb) {
98: cnt++;
99: goto ok1;
100: }
101: aa = a->a + ia[i];
102: for (j=0; j<na; j++) {
103: if (aa[j] != 0.0) goto ok1;
104: }
105: bb = b->a + ib[i];
106: for (j=0; j <nb; j++) {
107: if (bb[j] != 0.0) goto ok1;
108: }
109: cnt++;
110: ok1:;
111: }
112: MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));
113: if (!n0rows) return(0);
114: PetscMalloc1(M->rmap->n-cnt,&rows);
115: cnt = 0;
116: for (i=0; i<m; i++) {
117: na = ia[i+1] - ia[i];
118: nb = ib[i+1] - ib[i];
119: if (!na && !nb) continue;
120: aa = a->a + ia[i];
121: for (j=0; j<na;j++) {
122: if (aa[j] != 0.0) {
123: rows[cnt++] = rstart + i;
124: goto ok2;
125: }
126: }
127: bb = b->a + ib[i];
128: for (j=0; j<nb; j++) {
129: if (bb[j] != 0.0) {
130: rows[cnt++] = rstart + i;
131: goto ok2;
132: }
133: }
134: ok2:;
135: }
136: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
137: return(0);
138: }
140: PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141: {
142: PetscErrorCode ierr;
143: Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data;
144: PetscBool cong;
147: MatHasCongruentLayouts(Y,&cong);
148: if (Y->assembled && cong) {
149: MatDiagonalSet(aij->A,D,is);
150: } else {
151: MatDiagonalSet_Default(Y,D,is);
152: }
153: return(0);
154: }
156: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157: {
158: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
160: PetscInt i,rstart,nrows,*rows;
163: *zrows = NULL;
164: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
165: MatGetOwnershipRange(M,&rstart,NULL);
166: for (i=0; i<nrows; i++) rows[i] += rstart;
167: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
168: return(0);
169: }
171: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172: {
174: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
175: PetscInt i,n,*garray = aij->garray;
176: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
177: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
178: PetscReal *work;
181: MatGetSize(A,NULL,&n);
182: PetscCalloc1(n,&work);
183: if (type == NORM_2) {
184: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186: }
187: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189: }
190: } else if (type == NORM_1) {
191: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193: }
194: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196: }
197: } else if (type == NORM_INFINITY) {
198: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200: }
201: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203: }
205: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206: if (type == NORM_INFINITY) {
207: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
208: } else {
209: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
210: }
211: PetscFree(work);
212: if (type == NORM_2) {
213: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214: }
215: return(0);
216: }
218: PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219: {
220: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
221: IS sis,gis;
222: PetscErrorCode ierr;
223: const PetscInt *isis,*igis;
224: PetscInt n,*iis,nsis,ngis,rstart,i;
227: MatFindOffBlockDiagonalEntries(a->A,&sis);
228: MatFindNonzeroRows(a->B,&gis);
229: ISGetSize(gis,&ngis);
230: ISGetSize(sis,&nsis);
231: ISGetIndices(sis,&isis);
232: ISGetIndices(gis,&igis);
234: PetscMalloc1(ngis+nsis,&iis);
235: PetscArraycpy(iis,igis,ngis);
236: PetscArraycpy(iis+ngis,isis,nsis);
237: n = ngis + nsis;
238: PetscSortRemoveDupsInt(&n,iis);
239: MatGetOwnershipRange(A,&rstart,NULL);
240: for (i=0; i<n; i++) iis[i] += rstart;
241: ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);
243: ISRestoreIndices(sis,&isis);
244: ISRestoreIndices(gis,&igis);
245: ISDestroy(&sis);
246: ISDestroy(&gis);
247: return(0);
248: }
250: /*
251: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
254: Only for square matrices
256: Used by a preconditioner, hence PETSC_EXTERN
257: */
258: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259: {
260: PetscMPIInt rank,size;
261: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
263: Mat mat;
264: Mat_SeqAIJ *gmata;
265: PetscMPIInt tag;
266: MPI_Status status;
267: PetscBool aij;
268: MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL;
271: MPI_Comm_rank(comm,&rank);
272: MPI_Comm_size(comm,&size);
273: if (!rank) {
274: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
275: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276: }
277: if (reuse == MAT_INITIAL_MATRIX) {
278: MatCreate(comm,&mat);
279: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
280: MatGetBlockSizes(gmat,&bses[0],&bses[1]);
281: MPI_Bcast(bses,2,MPIU_INT,0,comm);
282: MatSetBlockSizes(mat,bses[0],bses[1]);
283: MatSetType(mat,MATAIJ);
284: PetscMalloc1(size+1,&rowners);
285: PetscMalloc2(m,&dlens,m,&olens);
286: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
288: rowners[0] = 0;
289: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290: rstart = rowners[rank];
291: rend = rowners[rank+1];
292: PetscObjectGetNewTag((PetscObject)mat,&tag);
293: if (!rank) {
294: gmata = (Mat_SeqAIJ*) gmat->data;
295: /* send row lengths to all processors */
296: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297: for (i=1; i<size; i++) {
298: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
299: }
300: /* determine number diagonal and off-diagonal counts */
301: PetscArrayzero(olens,m);
302: PetscCalloc1(m,&ld);
303: jj = 0;
304: for (i=0; i<m; i++) {
305: for (j=0; j<dlens[i]; j++) {
306: if (gmata->j[jj] < rstart) ld[i]++;
307: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308: jj++;
309: }
310: }
311: /* send column indices to other processes */
312: for (i=1; i<size; i++) {
313: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
315: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
316: }
318: /* send numerical values to other processes */
319: for (i=1; i<size; i++) {
320: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
322: }
323: gmataa = gmata->a;
324: gmataj = gmata->j;
326: } else {
327: /* receive row lengths */
328: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
329: /* receive column indices */
330: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
331: PetscMalloc2(nz,&gmataa,nz,&gmataj);
332: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
333: /* determine number diagonal and off-diagonal counts */
334: PetscArrayzero(olens,m);
335: PetscCalloc1(m,&ld);
336: jj = 0;
337: for (i=0; i<m; i++) {
338: for (j=0; j<dlens[i]; j++) {
339: if (gmataj[jj] < rstart) ld[i]++;
340: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341: jj++;
342: }
343: }
344: /* receive numerical values */
345: PetscArrayzero(gmataa,nz);
346: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
347: }
348: /* set preallocation */
349: for (i=0; i<m; i++) {
350: dlens[i] -= olens[i];
351: }
352: MatSeqAIJSetPreallocation(mat,0,dlens);
353: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
355: for (i=0; i<m; i++) {
356: dlens[i] += olens[i];
357: }
358: cnt = 0;
359: for (i=0; i<m; i++) {
360: row = rstart + i;
361: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
362: cnt += dlens[i];
363: }
364: if (rank) {
365: PetscFree2(gmataa,gmataj);
366: }
367: PetscFree2(dlens,olens);
368: PetscFree(rowners);
370: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
372: *inmat = mat;
373: } else { /* column indices are already set; only need to move over numerical values from process 0 */
374: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376: mat = *inmat;
377: PetscObjectGetNewTag((PetscObject)mat,&tag);
378: if (!rank) {
379: /* send numerical values to other processes */
380: gmata = (Mat_SeqAIJ*) gmat->data;
381: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
382: gmataa = gmata->a;
383: for (i=1; i<size; i++) {
384: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
386: }
387: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388: } else {
389: /* receive numerical values from process 0*/
390: nz = Ad->nz + Ao->nz;
391: PetscMalloc1(nz,&gmataa); gmataarestore = gmataa;
392: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
393: }
394: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396: ad = Ad->a;
397: ao = Ao->a;
398: if (mat->rmap->n) {
399: i = 0;
400: nz = ld[i]; PetscArraycpy(ao,gmataa,nz); ao += nz; gmataa += nz;
401: nz = Ad->i[i+1] - Ad->i[i]; PetscArraycpy(ad,gmataa,nz); ad += nz; gmataa += nz;
402: }
403: for (i=1; i<mat->rmap->n; i++) {
404: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscArraycpy(ao,gmataa,nz); ao += nz; gmataa += nz;
405: nz = Ad->i[i+1] - Ad->i[i]; PetscArraycpy(ad,gmataa,nz); ad += nz; gmataa += nz;
406: }
407: i--;
408: if (mat->rmap->n) {
409: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscArraycpy(ao,gmataa,nz);
410: }
411: if (rank) {
412: PetscFree(gmataarestore);
413: }
414: }
415: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
416: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
417: return(0);
418: }
420: /*
421: Local utility routine that creates a mapping from the global column
422: number to the local number in the off-diagonal part of the local
423: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
424: a slightly higher hash table cost; without it it is not scalable (each processor
425: has an order N integer array but is fast to acess.
426: */
427: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428: {
429: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
431: PetscInt n = aij->B->cmap->n,i;
434: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435: #if defined(PETSC_USE_CTABLE)
436: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
437: for (i=0; i<n; i++) {
438: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
439: }
440: #else
441: PetscCalloc1(mat->cmap->N+1,&aij->colmap);
442: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
443: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444: #endif
445: return(0);
446: }
448: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \
449: { \
450: if (col <= lastcol1) low1 = 0; \
451: else high1 = nrow1; \
452: lastcol1 = col;\
453: while (high1-low1 > 5) { \
454: t = (low1+high1)/2; \
455: if (rp1[t] > col) high1 = t; \
456: else low1 = t; \
457: } \
458: for (_i=low1; _i<high1; _i++) { \
459: if (rp1[_i] > col) break; \
460: if (rp1[_i] == col) { \
461: if (addv == ADD_VALUES) { \
462: ap1[_i] += value; \
463: /* Not sure LogFlops will slow dow the code or not */ \
464: (void)PetscLogFlops(1.0); \
465: } \
466: else ap1[_i] = value; \
467: inserted = PETSC_TRUE; \
468: goto a_noinsert; \
469: } \
470: } \
471: if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
473: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475: N = nrow1++ - 1; a->nz++; high1++; \
476: /* shift up all the later entries in this row */ \
477: PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);\
478: PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);\
479: rp1[_i] = col; \
480: ap1[_i] = value; \
481: A->nonzerostate++;\
482: a_noinsert: ; \
483: ailen[row] = nrow1; \
484: }
486: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487: { \
488: if (col <= lastcol2) low2 = 0; \
489: else high2 = nrow2; \
490: lastcol2 = col; \
491: while (high2-low2 > 5) { \
492: t = (low2+high2)/2; \
493: if (rp2[t] > col) high2 = t; \
494: else low2 = t; \
495: } \
496: for (_i=low2; _i<high2; _i++) { \
497: if (rp2[_i] > col) break; \
498: if (rp2[_i] == col) { \
499: if (addv == ADD_VALUES) { \
500: ap2[_i] += value; \
501: (void)PetscLogFlops(1.0); \
502: } \
503: else ap2[_i] = value; \
504: inserted = PETSC_TRUE; \
505: goto b_noinsert; \
506: } \
507: } \
508: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
510: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512: N = nrow2++ - 1; b->nz++; high2++; \
513: /* shift up all the later entries in this row */ \
514: PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);\
515: PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);\
516: rp2[_i] = col; \
517: ap2[_i] = value; \
518: B->nonzerostate++; \
519: b_noinsert: ; \
520: bilen[row] = nrow2; \
521: }
523: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524: {
525: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
526: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
528: PetscInt l,*garray = mat->garray,diag;
531: /* code only works for square matrices A */
533: /* find size of row to the left of the diagonal part */
534: MatGetOwnershipRange(A,&diag,NULL);
535: row = row - diag;
536: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537: if (garray[b->j[b->i[row]+l]] > diag) break;
538: }
539: PetscArraycpy(b->a+b->i[row],v,l);
541: /* diagonal part */
542: PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));
544: /* right of diagonal part */
545: PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);
546: #if defined(PETSC_HAVE_DEVICE)
547: if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548: #endif
549: return(0);
550: }
552: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553: {
554: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
555: PetscScalar value = 0.0;
557: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
558: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559: PetscBool roworiented = aij->roworiented;
561: /* Some Variables required in the macro */
562: Mat A = aij->A;
563: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
564: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565: MatScalar *aa = a->a;
566: PetscBool ignorezeroentries = a->ignorezeroentries;
567: Mat B = aij->B;
568: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
569: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570: MatScalar *ba = b->a;
571: /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572: * cannot use "#if defined" inside a macro. */
573: PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
575: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576: PetscInt nonew;
577: MatScalar *ap1,*ap2;
580: for (i=0; i<m; i++) {
581: if (im[i] < 0) continue;
582: if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583: if (im[i] >= rstart && im[i] < rend) {
584: row = im[i] - rstart;
585: lastcol1 = -1;
586: rp1 = aj + ai[row];
587: ap1 = aa + ai[row];
588: rmax1 = aimax[row];
589: nrow1 = ailen[row];
590: low1 = 0;
591: high1 = nrow1;
592: lastcol2 = -1;
593: rp2 = bj + bi[row];
594: ap2 = ba + bi[row];
595: rmax2 = bimax[row];
596: nrow2 = bilen[row];
597: low2 = 0;
598: high2 = nrow2;
600: for (j=0; j<n; j++) {
601: if (v) value = roworiented ? v[i*n+j] : v[i+j*m];
602: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603: if (in[j] >= cstart && in[j] < cend) {
604: col = in[j] - cstart;
605: nonew = a->nonew;
606: MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607: #if defined(PETSC_HAVE_DEVICE)
608: if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609: #endif
610: } else if (in[j] < 0) continue;
611: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612: else {
613: if (mat->was_assembled) {
614: if (!aij->colmap) {
615: MatCreateColmap_MPIAIJ_Private(mat);
616: }
617: #if defined(PETSC_USE_CTABLE)
618: PetscTableFind(aij->colmap,in[j]+1,&col);
619: col--;
620: #else
621: col = aij->colmap[in[j]] - 1;
622: #endif
623: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624: MatDisAssemble_MPIAIJ(mat);
625: col = in[j];
626: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627: B = aij->B;
628: b = (Mat_SeqAIJ*)B->data;
629: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630: rp2 = bj + bi[row];
631: ap2 = ba + bi[row];
632: rmax2 = bimax[row];
633: nrow2 = bilen[row];
634: low2 = 0;
635: high2 = nrow2;
636: bm = aij->B->rmap->n;
637: ba = b->a;
638: inserted = PETSC_FALSE;
639: } else if (col < 0) {
640: if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641: PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);
642: } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643: }
644: } else col = in[j];
645: nonew = b->nonew;
646: MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647: #if defined(PETSC_HAVE_DEVICE)
648: if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649: #endif
650: }
651: }
652: } else {
653: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654: if (!aij->donotstash) {
655: mat->assembled = PETSC_FALSE;
656: if (roworiented) {
657: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
658: } else {
659: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
660: }
661: }
662: }
663: }
664: return(0);
665: }
667: /*
668: This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669: The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670: No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671: */
672: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673: {
674: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
675: Mat A = aij->A; /* diagonal part of the matrix */
676: Mat B = aij->B; /* offdiagonal part of the matrix */
677: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
678: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
679: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col;
680: PetscInt *ailen = a->ilen,*aj = a->j;
681: PetscInt *bilen = b->ilen,*bj = b->j;
682: PetscInt am = aij->A->rmap->n,j;
683: PetscInt diag_so_far = 0,dnz;
684: PetscInt offd_so_far = 0,onz;
687: /* Iterate over all rows of the matrix */
688: for (j=0; j<am; j++) {
689: dnz = onz = 0;
690: /* Iterate over all non-zero columns of the current row */
691: for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692: /* If column is in the diagonal */
693: if (mat_j[col] >= cstart && mat_j[col] < cend) {
694: aj[diag_so_far++] = mat_j[col] - cstart;
695: dnz++;
696: } else { /* off-diagonal entries */
697: bj[offd_so_far++] = mat_j[col];
698: onz++;
699: }
700: }
701: ailen[j] = dnz;
702: bilen[j] = onz;
703: }
704: return(0);
705: }
707: /*
708: This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709: The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710: No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711: Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712: would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713: */
714: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715: {
716: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
717: Mat A = aij->A; /* diagonal part of the matrix */
718: Mat B = aij->B; /* offdiagonal part of the matrix */
719: Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
721: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
722: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723: PetscInt *ailen = a->ilen,*aj = a->j;
724: PetscInt *bilen = b->ilen,*bj = b->j;
725: PetscInt am = aij->A->rmap->n,j;
726: PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727: PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728: PetscScalar *aa = a->a,*ba = b->a;
731: /* Iterate over all rows of the matrix */
732: for (j=0; j<am; j++) {
733: dnz_row = onz_row = 0;
734: rowstart_offd = full_offd_i[j];
735: rowstart_diag = full_diag_i[j];
736: /* Iterate over all non-zero columns of the current row */
737: for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738: /* If column is in the diagonal */
739: if (mat_j[col] >= cstart && mat_j[col] < cend) {
740: aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741: aa[rowstart_diag+dnz_row] = mat_a[col];
742: dnz_row++;
743: } else { /* off-diagonal entries */
744: bj[rowstart_offd+onz_row] = mat_j[col];
745: ba[rowstart_offd+onz_row] = mat_a[col];
746: onz_row++;
747: }
748: }
749: ailen[j] = dnz_row;
750: bilen[j] = onz_row;
751: }
752: return(0);
753: }
755: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756: {
757: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
759: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
763: for (i=0; i<m; i++) {
764: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766: if (idxm[i] >= rstart && idxm[i] < rend) {
767: row = idxm[i] - rstart;
768: for (j=0; j<n; j++) {
769: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771: if (idxn[j] >= cstart && idxn[j] < cend) {
772: col = idxn[j] - cstart;
773: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
774: } else {
775: if (!aij->colmap) {
776: MatCreateColmap_MPIAIJ_Private(mat);
777: }
778: #if defined(PETSC_USE_CTABLE)
779: PetscTableFind(aij->colmap,idxn[j]+1,&col);
780: col--;
781: #else
782: col = aij->colmap[idxn[j]] - 1;
783: #endif
784: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785: else {
786: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
787: }
788: }
789: }
790: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791: }
792: return(0);
793: }
795: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796: {
797: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
799: PetscInt nstash,reallocs;
802: if (aij->donotstash || mat->nooffprocentries) return(0);
804: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
805: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
806: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
807: return(0);
808: }
810: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811: {
812: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
814: PetscMPIInt n;
815: PetscInt i,j,rstart,ncols,flg;
816: PetscInt *row,*col;
817: PetscBool other_disassembled;
818: PetscScalar *val;
820: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
823: if (!aij->donotstash && !mat->nooffprocentries) {
824: while (1) {
825: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
826: if (!flg) break;
828: for (i=0; i<n;) {
829: /* Now identify the consecutive vals belonging to the same row */
830: for (j=i,rstart=row[j]; j<n; j++) {
831: if (row[j] != rstart) break;
832: }
833: if (j < n) ncols = j-i;
834: else ncols = n-i;
835: /* Now assemble all these values with a single function call */
836: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
837: i = j;
838: }
839: }
840: MatStashScatterEnd_Private(&mat->stash);
841: }
842: #if defined(PETSC_HAVE_DEVICE)
843: if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844: /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845: if (mat->boundtocpu) {
846: MatBindToCPU(aij->A,PETSC_TRUE);
847: MatBindToCPU(aij->B,PETSC_TRUE);
848: }
849: #endif
850: MatAssemblyBegin(aij->A,mode);
851: MatAssemblyEnd(aij->A,mode);
853: /* determine if any processor has disassembled, if so we must
854: also disassemble ourself, in order that we may reassemble. */
855: /*
856: if nonzero structure of submatrix B cannot change then we know that
857: no processor disassembled thus we can skip this stuff
858: */
859: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
861: if (mat->was_assembled && !other_disassembled) {
862: #if defined(PETSC_HAVE_DEVICE)
863: aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864: #endif
865: MatDisAssemble_MPIAIJ(mat);
866: }
867: }
868: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869: MatSetUpMultiply_MPIAIJ(mat);
870: }
871: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
872: #if defined(PETSC_HAVE_DEVICE)
873: if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874: #endif
875: MatAssemblyBegin(aij->B,mode);
876: MatAssemblyEnd(aij->B,mode);
878: PetscFree2(aij->rowvalues,aij->rowindices);
880: aij->rowvalues = NULL;
882: VecDestroy(&aij->diag);
884: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886: PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
888: }
889: #if defined(PETSC_HAVE_DEVICE)
890: mat->offloadmask = PETSC_OFFLOAD_BOTH;
891: #endif
892: return(0);
893: }
895: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896: {
897: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
901: MatZeroEntries(l->A);
902: MatZeroEntries(l->B);
903: return(0);
904: }
906: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907: {
908: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
909: PetscObjectState sA, sB;
910: PetscInt *lrows;
911: PetscInt r, len;
912: PetscBool cong, lch, gch;
913: PetscErrorCode ierr;
916: /* get locally owned rows */
917: MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);
918: MatHasCongruentLayouts(A,&cong);
919: /* fix right hand side if needed */
920: if (x && b) {
921: const PetscScalar *xx;
922: PetscScalar *bb;
924: if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925: VecGetArrayRead(x, &xx);
926: VecGetArray(b, &bb);
927: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928: VecRestoreArrayRead(x, &xx);
929: VecRestoreArray(b, &bb);
930: }
932: sA = mat->A->nonzerostate;
933: sB = mat->B->nonzerostate;
935: if (diag != 0.0 && cong) {
936: MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
937: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
938: } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939: Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940: Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941: PetscInt nnwA, nnwB;
942: PetscBool nnzA, nnzB;
944: nnwA = aijA->nonew;
945: nnwB = aijB->nonew;
946: nnzA = aijA->keepnonzeropattern;
947: nnzB = aijB->keepnonzeropattern;
948: if (!nnzA) {
949: PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");
950: aijA->nonew = 0;
951: }
952: if (!nnzB) {
953: PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");
954: aijB->nonew = 0;
955: }
956: /* Must zero here before the next loop */
957: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
958: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
959: for (r = 0; r < len; ++r) {
960: const PetscInt row = lrows[r] + A->rmap->rstart;
961: if (row >= A->cmap->N) continue;
962: MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
963: }
964: aijA->nonew = nnwA;
965: aijB->nonew = nnwB;
966: } else {
967: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
968: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
969: }
970: PetscFree(lrows);
971: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
972: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
974: /* reduce nonzerostate */
975: lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976: MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));
977: if (gch) A->nonzerostate++;
978: return(0);
979: }
981: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982: {
983: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
984: PetscErrorCode ierr;
985: PetscMPIInt n = A->rmap->n;
986: PetscInt i,j,r,m,len = 0;
987: PetscInt *lrows,*owners = A->rmap->range;
988: PetscMPIInt p = 0;
989: PetscSFNode *rrows;
990: PetscSF sf;
991: const PetscScalar *xx;
992: PetscScalar *bb,*mask;
993: Vec xmask,lmask;
994: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
995: const PetscInt *aj, *ii,*ridx;
996: PetscScalar *aa;
999: /* Create SF where leaves are input rows and roots are owned rows */
1000: PetscMalloc1(n, &lrows);
1001: for (r = 0; r < n; ++r) lrows[r] = -1;
1002: PetscMalloc1(N, &rrows);
1003: for (r = 0; r < N; ++r) {
1004: const PetscInt idx = rows[r];
1005: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007: PetscLayoutFindOwner(A->rmap,idx,&p);
1008: }
1009: rrows[r].rank = p;
1010: rrows[r].index = rows[r] - owners[p];
1011: }
1012: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1013: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1014: /* Collect flags for rows to be zeroed */
1015: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1016: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1017: PetscSFDestroy(&sf);
1018: /* Compress and put in row numbers */
1019: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020: /* zero diagonal part of matrix */
1021: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
1022: /* handle off diagonal part of matrix */
1023: MatCreateVecs(A,&xmask,NULL);
1024: VecDuplicate(l->lvec,&lmask);
1025: VecGetArray(xmask,&bb);
1026: for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027: VecRestoreArray(xmask,&bb);
1028: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1029: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1030: VecDestroy(&xmask);
1031: if (x && b) { /* this code is buggy when the row and column layout don't match */
1032: PetscBool cong;
1034: MatHasCongruentLayouts(A,&cong);
1035: if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1037: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1038: VecGetArrayRead(l->lvec,&xx);
1039: VecGetArray(b,&bb);
1040: }
1041: VecGetArray(lmask,&mask);
1042: /* remove zeroed rows of off diagonal matrix */
1043: ii = aij->i;
1044: for (i=0; i<len; i++) {
1045: PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);
1046: }
1047: /* loop over all elements of off process part of matrix zeroing removed columns*/
1048: if (aij->compressedrow.use) {
1049: m = aij->compressedrow.nrows;
1050: ii = aij->compressedrow.i;
1051: ridx = aij->compressedrow.rindex;
1052: for (i=0; i<m; i++) {
1053: n = ii[i+1] - ii[i];
1054: aj = aij->j + ii[i];
1055: aa = aij->a + ii[i];
1057: for (j=0; j<n; j++) {
1058: if (PetscAbsScalar(mask[*aj])) {
1059: if (b) bb[*ridx] -= *aa*xx[*aj];
1060: *aa = 0.0;
1061: }
1062: aa++;
1063: aj++;
1064: }
1065: ridx++;
1066: }
1067: } else { /* do not use compressed row format */
1068: m = l->B->rmap->n;
1069: for (i=0; i<m; i++) {
1070: n = ii[i+1] - ii[i];
1071: aj = aij->j + ii[i];
1072: aa = aij->a + ii[i];
1073: for (j=0; j<n; j++) {
1074: if (PetscAbsScalar(mask[*aj])) {
1075: if (b) bb[i] -= *aa*xx[*aj];
1076: *aa = 0.0;
1077: }
1078: aa++;
1079: aj++;
1080: }
1081: }
1082: }
1083: if (x && b) {
1084: VecRestoreArray(b,&bb);
1085: VecRestoreArrayRead(l->lvec,&xx);
1086: }
1087: VecRestoreArray(lmask,&mask);
1088: VecDestroy(&lmask);
1089: PetscFree(lrows);
1091: /* only change matrix nonzero state if pattern was allowed to be changed */
1092: if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094: MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
1095: }
1096: return(0);
1097: }
1099: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100: {
1101: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1103: PetscInt nt;
1104: VecScatter Mvctx = a->Mvctx;
1107: VecGetLocalSize(xx,&nt);
1108: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109: VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1110: (*a->A->ops->mult)(a->A,xx,yy);
1111: VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1112: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1113: return(0);
1114: }
1116: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117: {
1118: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1122: MatMultDiagonalBlock(a->A,bb,xx);
1123: return(0);
1124: }
1126: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127: {
1128: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1130: VecScatter Mvctx = a->Mvctx;
1133: if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1134: VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1135: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1136: VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1137: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1138: return(0);
1139: }
1141: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1142: {
1143: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1147: /* do nondiagonal part */
1148: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1149: /* do local part */
1150: (*a->A->ops->multtranspose)(a->A,xx,yy);
1151: /* add partial results together */
1152: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1153: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1154: return(0);
1155: }
1157: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1158: {
1159: MPI_Comm comm;
1160: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1161: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162: IS Me,Notme;
1164: PetscInt M,N,first,last,*notme,i;
1165: PetscBool lf;
1166: PetscMPIInt size;
1169: /* Easy test: symmetric diagonal block */
1170: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1171: MatIsTranspose(Adia,Bdia,tol,&lf);
1172: MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));
1173: if (!*f) return(0);
1174: PetscObjectGetComm((PetscObject)Amat,&comm);
1175: MPI_Comm_size(comm,&size);
1176: if (size == 1) return(0);
1178: /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1179: MatGetSize(Amat,&M,&N);
1180: MatGetOwnershipRange(Amat,&first,&last);
1181: PetscMalloc1(N-last+first,¬me);
1182: for (i=0; i<first; i++) notme[i] = i;
1183: for (i=last; i<M; i++) notme[i-last+first] = i;
1184: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1185: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1186: MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1187: Aoff = Aoffs[0];
1188: MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1189: Boff = Boffs[0];
1190: MatIsTranspose(Aoff,Boff,tol,f);
1191: MatDestroyMatrices(1,&Aoffs);
1192: MatDestroyMatrices(1,&Boffs);
1193: ISDestroy(&Me);
1194: ISDestroy(&Notme);
1195: PetscFree(notme);
1196: return(0);
1197: }
1199: PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f)
1200: {
1204: MatIsTranspose_MPIAIJ(A,A,tol,f);
1205: return(0);
1206: }
1208: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209: {
1210: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1214: /* do nondiagonal part */
1215: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1216: /* do local part */
1217: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1218: /* add partial results together */
1219: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1220: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1221: return(0);
1222: }
1224: /*
1225: This only works correctly for square matrices where the subblock A->A is the
1226: diagonal block
1227: */
1228: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1229: {
1231: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1234: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1236: MatGetDiagonal(a->A,v);
1237: return(0);
1238: }
1240: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1241: {
1242: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1246: MatScale(a->A,aa);
1247: MatScale(a->B,aa);
1248: return(0);
1249: }
1251: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252: {
1253: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257: #if defined(PETSC_USE_LOG)
1258: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259: #endif
1260: MatStashDestroy_Private(&mat->stash);
1261: VecDestroy(&aij->diag);
1262: MatDestroy(&aij->A);
1263: MatDestroy(&aij->B);
1264: #if defined(PETSC_USE_CTABLE)
1265: PetscTableDestroy(&aij->colmap);
1266: #else
1267: PetscFree(aij->colmap);
1268: #endif
1269: PetscFree(aij->garray);
1270: VecDestroy(&aij->lvec);
1271: VecScatterDestroy(&aij->Mvctx);
1272: if (aij->Mvctx_mpi1) {VecScatterDestroy(&aij->Mvctx_mpi1);}
1273: PetscFree2(aij->rowvalues,aij->rowindices);
1274: PetscFree(aij->ld);
1275: PetscFree(mat->data);
1277: /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1278: PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);
1280: PetscObjectChangeTypeName((PetscObject)mat,NULL);
1281: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1282: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1283: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1284: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1285: PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);
1286: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1287: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1288: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);
1289: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1290: #if defined(PETSC_HAVE_CUDA)
1291: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);
1292: #endif
1293: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1294: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);
1295: #endif
1296: #if defined(PETSC_HAVE_ELEMENTAL)
1297: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);
1298: #endif
1299: #if defined(PETSC_HAVE_SCALAPACK)
1300: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);
1301: #endif
1302: #if defined(PETSC_HAVE_HYPRE)
1303: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);
1304: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);
1305: #endif
1306: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);
1307: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);
1308: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);
1309: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);
1310: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);
1311: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);
1312: #if defined(PETSC_HAVE_MKL_SPARSE)
1313: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);
1314: #endif
1315: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);
1316: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);
1317: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);
1318: return(0);
1319: }
1321: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1322: {
1323: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1324: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1325: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1326: const PetscInt *garray = aij->garray;
1327: PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1328: PetscInt *rowlens;
1329: PetscInt *colidxs;
1330: PetscScalar *matvals;
1331: PetscErrorCode ierr;
1334: PetscViewerSetUp(viewer);
1336: M = mat->rmap->N;
1337: N = mat->cmap->N;
1338: m = mat->rmap->n;
1339: rs = mat->rmap->rstart;
1340: cs = mat->cmap->rstart;
1341: nz = A->nz + B->nz;
1343: /* write matrix header */
1344: header[0] = MAT_FILE_CLASSID;
1345: header[1] = M; header[2] = N; header[3] = nz;
1346: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1347: PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);
1349: /* fill in and store row lengths */
1350: PetscMalloc1(m,&rowlens);
1351: for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352: PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);
1353: PetscFree(rowlens);
1355: /* fill in and store column indices */
1356: PetscMalloc1(nz,&colidxs);
1357: for (cnt=0, i=0; i<m; i++) {
1358: for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1359: if (garray[B->j[jb]] > cs) break;
1360: colidxs[cnt++] = garray[B->j[jb]];
1361: }
1362: for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1363: colidxs[cnt++] = A->j[ja] + cs;
1364: for (; jb<B->i[i+1]; jb++)
1365: colidxs[cnt++] = garray[B->j[jb]];
1366: }
1367: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1368: PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);
1369: PetscFree(colidxs);
1371: /* fill in and store nonzero values */
1372: PetscMalloc1(nz,&matvals);
1373: for (cnt=0, i=0; i<m; i++) {
1374: for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1375: if (garray[B->j[jb]] > cs) break;
1376: matvals[cnt++] = B->a[jb];
1377: }
1378: for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1379: matvals[cnt++] = A->a[ja];
1380: for (; jb<B->i[i+1]; jb++)
1381: matvals[cnt++] = B->a[jb];
1382: }
1383: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1384: PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);
1385: PetscFree(matvals);
1387: /* write block size option to the viewer's .info file */
1388: MatView_Binary_BlockSizes(mat,viewer);
1389: return(0);
1390: }
1392: #include <petscdraw.h>
1393: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1394: {
1395: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1396: PetscErrorCode ierr;
1397: PetscMPIInt rank = aij->rank,size = aij->size;
1398: PetscBool isdraw,iascii,isbinary;
1399: PetscViewer sviewer;
1400: PetscViewerFormat format;
1403: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1404: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1405: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1406: if (iascii) {
1407: PetscViewerGetFormat(viewer,&format);
1408: if (format == PETSC_VIEWER_LOAD_BALANCE) {
1409: PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1410: PetscMalloc1(size,&nz);
1411: MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
1412: for (i=0; i<(PetscInt)size; i++) {
1413: nmax = PetscMax(nmax,nz[i]);
1414: nmin = PetscMin(nmin,nz[i]);
1415: navg += nz[i];
1416: }
1417: PetscFree(nz);
1418: navg = navg/size;
1419: PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);
1420: return(0);
1421: }
1422: PetscViewerGetFormat(viewer,&format);
1423: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1424: MatInfo info;
1425: PetscBool inodes;
1427: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1428: MatGetInfo(mat,MAT_LOCAL,&info);
1429: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1430: PetscViewerASCIIPushSynchronized(viewer);
1431: if (!inodes) {
1432: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1433: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);
1434: } else {
1435: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1436: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);
1437: }
1438: MatGetInfo(aij->A,MAT_LOCAL,&info);
1439: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1440: MatGetInfo(aij->B,MAT_LOCAL,&info);
1441: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1442: PetscViewerFlush(viewer);
1443: PetscViewerASCIIPopSynchronized(viewer);
1444: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1445: VecScatterView(aij->Mvctx,viewer);
1446: return(0);
1447: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1448: PetscInt inodecount,inodelimit,*inodes;
1449: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1450: if (inodes) {
1451: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1452: } else {
1453: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1454: }
1455: return(0);
1456: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1457: return(0);
1458: }
1459: } else if (isbinary) {
1460: if (size == 1) {
1461: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1462: MatView(aij->A,viewer);
1463: } else {
1464: MatView_MPIAIJ_Binary(mat,viewer);
1465: }
1466: return(0);
1467: } else if (iascii && size == 1) {
1468: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1469: MatView(aij->A,viewer);
1470: return(0);
1471: } else if (isdraw) {
1472: PetscDraw draw;
1473: PetscBool isnull;
1474: PetscViewerDrawGetDraw(viewer,0,&draw);
1475: PetscDrawIsNull(draw,&isnull);
1476: if (isnull) return(0);
1477: }
1479: { /* assemble the entire matrix onto first processor */
1480: Mat A = NULL, Av;
1481: IS isrow,iscol;
1483: ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);
1484: ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);
1485: MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);
1486: MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);
1487: /* The commented code uses MatCreateSubMatrices instead */
1488: /*
1489: Mat *AA, A = NULL, Av;
1490: IS isrow,iscol;
1492: ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);
1493: ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);
1494: MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);
1495: if (!rank) {
1496: PetscObjectReference((PetscObject)AA[0]);
1497: A = AA[0];
1498: Av = AA[0];
1499: }
1500: MatDestroySubMatrices(1,&AA);
1501: */
1502: ISDestroy(&iscol);
1503: ISDestroy(&isrow);
1504: /*
1505: Everyone has to call to draw the matrix since the graphics waits are
1506: synchronized across all processors that share the PetscDraw object
1507: */
1508: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1509: if (!rank) {
1510: if (((PetscObject)mat)->name) {
1511: PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);
1512: }
1513: MatView_SeqAIJ(Av,sviewer);
1514: }
1515: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1516: PetscViewerFlush(viewer);
1517: MatDestroy(&A);
1518: }
1519: return(0);
1520: }
1522: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1523: {
1525: PetscBool iascii,isdraw,issocket,isbinary;
1528: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1529: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1530: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1531: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1532: if (iascii || isdraw || isbinary || issocket) {
1533: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1534: }
1535: return(0);
1536: }
1538: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1539: {
1540: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1542: Vec bb1 = NULL;
1543: PetscBool hasop;
1546: if (flag == SOR_APPLY_UPPER) {
1547: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1548: return(0);
1549: }
1551: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1552: VecDuplicate(bb,&bb1);
1553: }
1555: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1556: if (flag & SOR_ZERO_INITIAL_GUESS) {
1557: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1558: its--;
1559: }
1561: while (its--) {
1562: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1563: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1565: /* update rhs: bb1 = bb - B*x */
1566: VecScale(mat->lvec,-1.0);
1567: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1569: /* local sweep */
1570: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1571: }
1572: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1573: if (flag & SOR_ZERO_INITIAL_GUESS) {
1574: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1575: its--;
1576: }
1577: while (its--) {
1578: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1579: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1581: /* update rhs: bb1 = bb - B*x */
1582: VecScale(mat->lvec,-1.0);
1583: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1585: /* local sweep */
1586: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1587: }
1588: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1589: if (flag & SOR_ZERO_INITIAL_GUESS) {
1590: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1591: its--;
1592: }
1593: while (its--) {
1594: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1595: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1597: /* update rhs: bb1 = bb - B*x */
1598: VecScale(mat->lvec,-1.0);
1599: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1601: /* local sweep */
1602: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1603: }
1604: } else if (flag & SOR_EISENSTAT) {
1605: Vec xx1;
1607: VecDuplicate(bb,&xx1);
1608: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1610: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1611: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1612: if (!mat->diag) {
1613: MatCreateVecs(matin,&mat->diag,NULL);
1614: MatGetDiagonal(matin,mat->diag);
1615: }
1616: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1617: if (hasop) {
1618: MatMultDiagonalBlock(matin,xx,bb1);
1619: } else {
1620: VecPointwiseMult(bb1,mat->diag,xx);
1621: }
1622: VecAYPX(bb1,(omega-2.0)/omega,bb);
1624: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1626: /* local sweep */
1627: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1628: VecAXPY(xx,1.0,xx1);
1629: VecDestroy(&xx1);
1630: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1632: VecDestroy(&bb1);
1634: matin->factorerrortype = mat->A->factorerrortype;
1635: return(0);
1636: }
1638: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1639: {
1640: Mat aA,aB,Aperm;
1641: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1642: PetscScalar *aa,*ba;
1643: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1644: PetscSF rowsf,sf;
1645: IS parcolp = NULL;
1646: PetscBool done;
1650: MatGetLocalSize(A,&m,&n);
1651: ISGetIndices(rowp,&rwant);
1652: ISGetIndices(colp,&cwant);
1653: PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);
1655: /* Invert row permutation to find out where my rows should go */
1656: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1657: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1658: PetscSFSetFromOptions(rowsf);
1659: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1660: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1661: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1663: /* Invert column permutation to find out where my columns should go */
1664: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1665: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1666: PetscSFSetFromOptions(sf);
1667: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1668: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1669: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1670: PetscSFDestroy(&sf);
1672: ISRestoreIndices(rowp,&rwant);
1673: ISRestoreIndices(colp,&cwant);
1674: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1676: /* Find out where my gcols should go */
1677: MatGetSize(aB,NULL,&ng);
1678: PetscMalloc1(ng,&gcdest);
1679: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1680: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1681: PetscSFSetFromOptions(sf);
1682: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1683: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1684: PetscSFDestroy(&sf);
1686: PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1687: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1688: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1689: for (i=0; i<m; i++) {
1690: PetscInt row = rdest[i];
1691: PetscMPIInt rowner;
1692: PetscLayoutFindOwner(A->rmap,row,&rowner);
1693: for (j=ai[i]; j<ai[i+1]; j++) {
1694: PetscInt col = cdest[aj[j]];
1695: PetscMPIInt cowner;
1696: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1697: if (rowner == cowner) dnnz[i]++;
1698: else onnz[i]++;
1699: }
1700: for (j=bi[i]; j<bi[i+1]; j++) {
1701: PetscInt col = gcdest[bj[j]];
1702: PetscMPIInt cowner;
1703: PetscLayoutFindOwner(A->cmap,col,&cowner);
1704: if (rowner == cowner) dnnz[i]++;
1705: else onnz[i]++;
1706: }
1707: }
1708: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1709: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1710: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1711: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1712: PetscSFDestroy(&rowsf);
1714: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1715: MatSeqAIJGetArray(aA,&aa);
1716: MatSeqAIJGetArray(aB,&ba);
1717: for (i=0; i<m; i++) {
1718: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1719: PetscInt j0,rowlen;
1720: rowlen = ai[i+1] - ai[i];
1721: for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1722: for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1723: MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);
1724: }
1725: rowlen = bi[i+1] - bi[i];
1726: for (j0=j=0; j<rowlen; j0=j) {
1727: for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1728: MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);
1729: }
1730: }
1731: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1732: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1733: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1734: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1735: MatSeqAIJRestoreArray(aA,&aa);
1736: MatSeqAIJRestoreArray(aB,&ba);
1737: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1738: PetscFree3(work,rdest,cdest);
1739: PetscFree(gcdest);
1740: if (parcolp) {ISDestroy(&colp);}
1741: *B = Aperm;
1742: return(0);
1743: }
1745: PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1746: {
1747: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1751: MatGetSize(aij->B,NULL,nghosts);
1752: if (ghosts) *ghosts = aij->garray;
1753: return(0);
1754: }
1756: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1757: {
1758: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1759: Mat A = mat->A,B = mat->B;
1761: PetscLogDouble isend[5],irecv[5];
1764: info->block_size = 1.0;
1765: MatGetInfo(A,MAT_LOCAL,info);
1767: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1768: isend[3] = info->memory; isend[4] = info->mallocs;
1770: MatGetInfo(B,MAT_LOCAL,info);
1772: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1773: isend[3] += info->memory; isend[4] += info->mallocs;
1774: if (flag == MAT_LOCAL) {
1775: info->nz_used = isend[0];
1776: info->nz_allocated = isend[1];
1777: info->nz_unneeded = isend[2];
1778: info->memory = isend[3];
1779: info->mallocs = isend[4];
1780: } else if (flag == MAT_GLOBAL_MAX) {
1781: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));
1783: info->nz_used = irecv[0];
1784: info->nz_allocated = irecv[1];
1785: info->nz_unneeded = irecv[2];
1786: info->memory = irecv[3];
1787: info->mallocs = irecv[4];
1788: } else if (flag == MAT_GLOBAL_SUM) {
1789: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));
1791: info->nz_used = irecv[0];
1792: info->nz_allocated = irecv[1];
1793: info->nz_unneeded = irecv[2];
1794: info->memory = irecv[3];
1795: info->mallocs = irecv[4];
1796: }
1797: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1798: info->fill_ratio_needed = 0;
1799: info->factor_mallocs = 0;
1800: return(0);
1801: }
1803: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1804: {
1805: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1809: switch (op) {
1810: case MAT_NEW_NONZERO_LOCATIONS:
1811: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1812: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1813: case MAT_KEEP_NONZERO_PATTERN:
1814: case MAT_NEW_NONZERO_LOCATION_ERR:
1815: case MAT_USE_INODES:
1816: case MAT_IGNORE_ZERO_ENTRIES:
1817: MatCheckPreallocated(A,1);
1818: MatSetOption(a->A,op,flg);
1819: MatSetOption(a->B,op,flg);
1820: break;
1821: case MAT_ROW_ORIENTED:
1822: MatCheckPreallocated(A,1);
1823: a->roworiented = flg;
1825: MatSetOption(a->A,op,flg);
1826: MatSetOption(a->B,op,flg);
1827: break;
1828: case MAT_NEW_DIAGONALS:
1829: case MAT_SORTED_FULL:
1830: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1831: break;
1832: case MAT_IGNORE_OFF_PROC_ENTRIES:
1833: a->donotstash = flg;
1834: break;
1835: /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1836: case MAT_SPD:
1837: case MAT_SYMMETRIC:
1838: case MAT_STRUCTURALLY_SYMMETRIC:
1839: case MAT_HERMITIAN:
1840: case MAT_SYMMETRY_ETERNAL:
1841: break;
1842: case MAT_SUBMAT_SINGLEIS:
1843: A->submat_singleis = flg;
1844: break;
1845: case MAT_STRUCTURE_ONLY:
1846: /* The option is handled directly by MatSetOption() */
1847: break;
1848: default:
1849: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1850: }
1851: return(0);
1852: }
1854: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1855: {
1856: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1857: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1859: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1860: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1861: PetscInt *cmap,*idx_p;
1864: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1865: mat->getrowactive = PETSC_TRUE;
1867: if (!mat->rowvalues && (idx || v)) {
1868: /*
1869: allocate enough space to hold information from the longest row.
1870: */
1871: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1872: PetscInt max = 1,tmp;
1873: for (i=0; i<matin->rmap->n; i++) {
1874: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1875: if (max < tmp) max = tmp;
1876: }
1877: PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1878: }
1880: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1881: lrow = row - rstart;
1883: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1884: if (!v) {pvA = NULL; pvB = NULL;}
1885: if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1886: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1887: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1888: nztot = nzA + nzB;
1890: cmap = mat->garray;
1891: if (v || idx) {
1892: if (nztot) {
1893: /* Sort by increasing column numbers, assuming A and B already sorted */
1894: PetscInt imark = -1;
1895: if (v) {
1896: *v = v_p = mat->rowvalues;
1897: for (i=0; i<nzB; i++) {
1898: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1899: else break;
1900: }
1901: imark = i;
1902: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1903: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1904: }
1905: if (idx) {
1906: *idx = idx_p = mat->rowindices;
1907: if (imark > -1) {
1908: for (i=0; i<imark; i++) {
1909: idx_p[i] = cmap[cworkB[i]];
1910: }
1911: } else {
1912: for (i=0; i<nzB; i++) {
1913: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1914: else break;
1915: }
1916: imark = i;
1917: }
1918: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1919: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1920: }
1921: } else {
1922: if (idx) *idx = NULL;
1923: if (v) *v = NULL;
1924: }
1925: }
1926: *nz = nztot;
1927: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1928: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1929: return(0);
1930: }
1932: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933: {
1934: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1937: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938: aij->getrowactive = PETSC_FALSE;
1939: return(0);
1940: }
1942: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1943: {
1944: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1945: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1947: PetscInt i,j,cstart = mat->cmap->rstart;
1948: PetscReal sum = 0.0;
1949: MatScalar *v;
1952: if (aij->size == 1) {
1953: MatNorm(aij->A,type,norm);
1954: } else {
1955: if (type == NORM_FROBENIUS) {
1956: v = amat->a;
1957: for (i=0; i<amat->nz; i++) {
1958: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959: }
1960: v = bmat->a;
1961: for (i=0; i<bmat->nz; i++) {
1962: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963: }
1964: MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1965: *norm = PetscSqrtReal(*norm);
1966: PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);
1967: } else if (type == NORM_1) { /* max column norm */
1968: PetscReal *tmp,*tmp2;
1969: PetscInt *jj,*garray = aij->garray;
1970: PetscCalloc1(mat->cmap->N+1,&tmp);
1971: PetscMalloc1(mat->cmap->N+1,&tmp2);
1972: *norm = 0.0;
1973: v = amat->a; jj = amat->j;
1974: for (j=0; j<amat->nz; j++) {
1975: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1976: }
1977: v = bmat->a; jj = bmat->j;
1978: for (j=0; j<bmat->nz; j++) {
1979: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1980: }
1981: MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1982: for (j=0; j<mat->cmap->N; j++) {
1983: if (tmp2[j] > *norm) *norm = tmp2[j];
1984: }
1985: PetscFree(tmp);
1986: PetscFree(tmp2);
1987: PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
1988: } else if (type == NORM_INFINITY) { /* max row norm */
1989: PetscReal ntemp = 0.0;
1990: for (j=0; j<aij->A->rmap->n; j++) {
1991: v = amat->a + amat->i[j];
1992: sum = 0.0;
1993: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1994: sum += PetscAbsScalar(*v); v++;
1995: }
1996: v = bmat->a + bmat->i[j];
1997: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1998: sum += PetscAbsScalar(*v); v++;
1999: }
2000: if (sum > ntemp) ntemp = sum;
2001: }
2002: MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
2003: PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
2004: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2005: }
2006: return(0);
2007: }
2009: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2010: {
2011: Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b;
2012: Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2013: PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2014: const PetscInt *ai,*aj,*bi,*bj,*B_diag_i;
2015: PetscErrorCode ierr;
2016: Mat B,A_diag,*B_diag;
2017: const MatScalar *array;
2020: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2021: ai = Aloc->i; aj = Aloc->j;
2022: bi = Bloc->i; bj = Bloc->j;
2023: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024: PetscInt *d_nnz,*g_nnz,*o_nnz;
2025: PetscSFNode *oloc;
2026: PETSC_UNUSED PetscSF sf;
2028: PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
2029: /* compute d_nnz for preallocation */
2030: PetscArrayzero(d_nnz,na);
2031: for (i=0; i<ai[ma]; i++) {
2032: d_nnz[aj[i]]++;
2033: }
2034: /* compute local off-diagonal contributions */
2035: PetscArrayzero(g_nnz,nb);
2036: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2037: /* map those to global */
2038: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
2039: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
2040: PetscSFSetFromOptions(sf);
2041: PetscArrayzero(o_nnz,na);
2042: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
2043: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
2044: PetscSFDestroy(&sf);
2046: MatCreate(PetscObjectComm((PetscObject)A),&B);
2047: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2048: MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
2049: MatSetType(B,((PetscObject)A)->type_name);
2050: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2051: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
2052: } else {
2053: B = *matout;
2054: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2055: }
2057: b = (Mat_MPIAIJ*)B->data;
2058: A_diag = a->A;
2059: B_diag = &b->A;
2060: sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data;
2061: A_diag_ncol = A_diag->cmap->N;
2062: B_diag_ilen = sub_B_diag->ilen;
2063: B_diag_i = sub_B_diag->i;
2065: /* Set ilen for diagonal of B */
2066: for (i=0; i<A_diag_ncol; i++) {
2067: B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2068: }
2070: /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2071: very quickly (=without using MatSetValues), because all writes are local. */
2072: MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);
2074: /* copy over the B part */
2075: PetscMalloc1(bi[mb],&cols);
2076: array = Bloc->a;
2077: row = A->rmap->rstart;
2078: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2079: cols_tmp = cols;
2080: for (i=0; i<mb; i++) {
2081: ncol = bi[i+1]-bi[i];
2082: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2083: row++;
2084: array += ncol; cols_tmp += ncol;
2085: }
2086: PetscFree(cols);
2088: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2089: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2090: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2091: *matout = B;
2092: } else {
2093: MatHeaderMerge(A,&B);
2094: }
2095: return(0);
2096: }
2098: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2099: {
2100: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2101: Mat a = aij->A,b = aij->B;
2103: PetscInt s1,s2,s3;
2106: MatGetLocalSize(mat,&s2,&s3);
2107: if (rr) {
2108: VecGetLocalSize(rr,&s1);
2109: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2110: /* Overlap communication with computation. */
2111: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2112: }
2113: if (ll) {
2114: VecGetLocalSize(ll,&s1);
2115: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2116: (*b->ops->diagonalscale)(b,ll,NULL);
2117: }
2118: /* scale the diagonal block */
2119: (*a->ops->diagonalscale)(a,ll,rr);
2121: if (rr) {
2122: /* Do a scatter end and then right scale the off-diagonal block */
2123: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2124: (*b->ops->diagonalscale)(b,NULL,aij->lvec);
2125: }
2126: return(0);
2127: }
2129: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2130: {
2131: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2135: MatSetUnfactored(a->A);
2136: return(0);
2137: }
2139: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2140: {
2141: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2142: Mat a,b,c,d;
2143: PetscBool flg;
2147: a = matA->A; b = matA->B;
2148: c = matB->A; d = matB->B;
2150: MatEqual(a,c,&flg);
2151: if (flg) {
2152: MatEqual(b,d,&flg);
2153: }
2154: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2155: return(0);
2156: }
2158: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2159: {
2161: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2162: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2165: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2166: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2167: /* because of the column compression in the off-processor part of the matrix a->B,
2168: the number of columns in a->B and b->B may be different, hence we cannot call
2169: the MatCopy() directly on the two parts. If need be, we can provide a more
2170: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2171: then copying the submatrices */
2172: MatCopy_Basic(A,B,str);
2173: } else {
2174: MatCopy(a->A,b->A,str);
2175: MatCopy(a->B,b->B,str);
2176: }
2177: PetscObjectStateIncrease((PetscObject)B);
2178: return(0);
2179: }
2181: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182: {
2186: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);
2187: return(0);
2188: }
2190: /*
2191: Computes the number of nonzeros per row needed for preallocation when X and Y
2192: have different nonzero structure.
2193: */
2194: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2195: {
2196: PetscInt i,j,k,nzx,nzy;
2199: /* Set the number of nonzeros in the new matrix */
2200: for (i=0; i<m; i++) {
2201: const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2202: nzx = xi[i+1] - xi[i];
2203: nzy = yi[i+1] - yi[i];
2204: nnz[i] = 0;
2205: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2206: for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2207: if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */
2208: nnz[i]++;
2209: }
2210: for (; k<nzy; k++) nnz[i]++;
2211: }
2212: return(0);
2213: }
2215: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2216: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2217: {
2219: PetscInt m = Y->rmap->N;
2220: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2221: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2224: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2225: return(0);
2226: }
2228: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2229: {
2231: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2232: PetscBLASInt bnz,one=1;
2233: Mat_SeqAIJ *x,*y;
2236: if (str == SAME_NONZERO_PATTERN) {
2237: PetscScalar alpha = a;
2238: x = (Mat_SeqAIJ*)xx->A->data;
2239: PetscBLASIntCast(x->nz,&bnz);
2240: y = (Mat_SeqAIJ*)yy->A->data;
2241: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2242: x = (Mat_SeqAIJ*)xx->B->data;
2243: y = (Mat_SeqAIJ*)yy->B->data;
2244: PetscBLASIntCast(x->nz,&bnz);
2245: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2246: PetscObjectStateIncrease((PetscObject)Y);
2247: /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2248: will be updated */
2249: #if defined(PETSC_HAVE_DEVICE)
2250: if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2251: Y->offloadmask = PETSC_OFFLOAD_CPU;
2252: }
2253: #endif
2254: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2255: MatAXPY_Basic(Y,a,X,str);
2256: } else {
2257: Mat B;
2258: PetscInt *nnz_d,*nnz_o;
2259: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2260: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2261: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2262: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2263: MatSetLayouts(B,Y->rmap,Y->cmap);
2264: MatSetType(B,MATMPIAIJ);
2265: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2266: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2267: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2268: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2269: MatHeaderReplace(Y,&B);
2270: PetscFree(nnz_d);
2271: PetscFree(nnz_o);
2272: }
2273: return(0);
2274: }
2276: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2278: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2279: {
2280: #if defined(PETSC_USE_COMPLEX)
2282: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2285: MatConjugate_SeqAIJ(aij->A);
2286: MatConjugate_SeqAIJ(aij->B);
2287: #else
2289: #endif
2290: return(0);
2291: }
2293: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294: {
2295: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2299: MatRealPart(a->A);
2300: MatRealPart(a->B);
2301: return(0);
2302: }
2304: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305: {
2306: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2310: MatImaginaryPart(a->A);
2311: MatImaginaryPart(a->B);
2312: return(0);
2313: }
2315: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2316: {
2317: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2318: PetscErrorCode ierr;
2319: PetscInt i,*idxb = NULL,m = A->rmap->n;
2320: PetscScalar *va,*vv;
2321: Vec vB,vA;
2322: const PetscScalar *vb;
2325: VecCreateSeq(PETSC_COMM_SELF,m,&vA);
2326: MatGetRowMaxAbs(a->A,vA,idx);
2328: VecGetArrayWrite(vA,&va);
2329: if (idx) {
2330: for (i=0; i<m; i++) {
2331: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2332: }
2333: }
2335: VecCreateSeq(PETSC_COMM_SELF,m,&vB);
2336: PetscMalloc1(m,&idxb);
2337: MatGetRowMaxAbs(a->B,vB,idxb);
2339: VecGetArrayWrite(v,&vv);
2340: VecGetArrayRead(vB,&vb);
2341: for (i=0; i<m; i++) {
2342: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2343: vv[i] = vb[i];
2344: if (idx) idx[i] = a->garray[idxb[i]];
2345: } else {
2346: vv[i] = va[i];
2347: if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2348: idx[i] = a->garray[idxb[i]];
2349: }
2350: }
2351: VecRestoreArrayWrite(vA,&vv);
2352: VecRestoreArrayWrite(vA,&va);
2353: VecRestoreArrayRead(vB,&vb);
2354: PetscFree(idxb);
2355: VecDestroy(&vA);
2356: VecDestroy(&vB);
2357: return(0);
2358: }
2360: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361: {
2362: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2363: PetscInt m = A->rmap->n,n = A->cmap->n;
2364: PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2365: PetscInt *cmap = mat->garray;
2366: PetscInt *diagIdx, *offdiagIdx;
2367: Vec diagV, offdiagV;
2368: PetscScalar *a, *diagA, *offdiagA, *ba;
2369: PetscInt r,j,col,ncols,*bi,*bj;
2371: Mat B = mat->B;
2372: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2375: /* When a process holds entire A and other processes have no entry */
2376: if (A->cmap->N == n) {
2377: VecGetArrayWrite(v,&diagA);
2378: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2379: MatGetRowMinAbs(mat->A,diagV,idx);
2380: VecDestroy(&diagV);
2381: VecRestoreArrayWrite(v,&diagA);
2382: return(0);
2383: } else if (n == 0) {
2384: if (m) {
2385: VecGetArrayWrite(v,&a);
2386: for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2387: VecRestoreArrayWrite(v,&a);
2388: }
2389: return(0);
2390: }
2392: PetscMalloc2(m,&diagIdx,m,&offdiagIdx);
2393: VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2394: VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2395: MatGetRowMinAbs(mat->A, diagV, diagIdx);
2397: /* Get offdiagIdx[] for implicit 0.0 */
2398: ba = b->a;
2399: bi = b->i;
2400: bj = b->j;
2401: VecGetArrayWrite(offdiagV, &offdiagA);
2402: for (r = 0; r < m; r++) {
2403: ncols = bi[r+1] - bi[r];
2404: if (ncols == A->cmap->N - n) { /* Brow is dense */
2405: offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2406: } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2407: offdiagA[r] = 0.0;
2409: /* Find first hole in the cmap */
2410: for (j=0; j<ncols; j++) {
2411: col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2412: if (col > j && j < cstart) {
2413: offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2414: break;
2415: } else if (col > j + n && j >= cstart) {
2416: offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2417: break;
2418: }
2419: }
2420: if (j == ncols && ncols < A->cmap->N - n) {
2421: /* a hole is outside compressed Bcols */
2422: if (ncols == 0) {
2423: if (cstart) {
2424: offdiagIdx[r] = 0;
2425: } else offdiagIdx[r] = cend;
2426: } else { /* ncols > 0 */
2427: offdiagIdx[r] = cmap[ncols-1] + 1;
2428: if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2429: }
2430: }
2431: }
2433: for (j=0; j<ncols; j++) {
2434: if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2435: ba++; bj++;
2436: }
2437: }
2439: VecGetArrayWrite(v, &a);
2440: VecGetArrayRead(diagV, (const PetscScalar**)&diagA);
2441: for (r = 0; r < m; ++r) {
2442: if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2443: a[r] = diagA[r];
2444: if (idx) idx[r] = cstart + diagIdx[r];
2445: } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2446: a[r] = diagA[r];
2447: if (idx) {
2448: if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2449: idx[r] = cstart + diagIdx[r];
2450: } else idx[r] = offdiagIdx[r];
2451: }
2452: } else {
2453: a[r] = offdiagA[r];
2454: if (idx) idx[r] = offdiagIdx[r];
2455: }
2456: }
2457: VecRestoreArrayWrite(v, &a);
2458: VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);
2459: VecRestoreArrayWrite(offdiagV, &offdiagA);
2460: VecDestroy(&diagV);
2461: VecDestroy(&offdiagV);
2462: PetscFree2(diagIdx, offdiagIdx);
2463: return(0);
2464: }
2466: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2467: {
2468: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2469: PetscInt m = A->rmap->n,n = A->cmap->n;
2470: PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2471: PetscInt *cmap = mat->garray;
2472: PetscInt *diagIdx, *offdiagIdx;
2473: Vec diagV, offdiagV;
2474: PetscScalar *a, *diagA, *offdiagA, *ba;
2475: PetscInt r,j,col,ncols,*bi,*bj;
2477: Mat B = mat->B;
2478: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2481: /* When a process holds entire A and other processes have no entry */
2482: if (A->cmap->N == n) {
2483: VecGetArrayWrite(v,&diagA);
2484: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2485: MatGetRowMin(mat->A,diagV,idx);
2486: VecDestroy(&diagV);
2487: VecRestoreArrayWrite(v,&diagA);
2488: return(0);
2489: } else if (n == 0) {
2490: if (m) {
2491: VecGetArrayWrite(v,&a);
2492: for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2493: VecRestoreArrayWrite(v,&a);
2494: }
2495: return(0);
2496: }
2498: PetscCalloc2(m,&diagIdx,m,&offdiagIdx);
2499: VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2500: VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2501: MatGetRowMin(mat->A, diagV, diagIdx);
2503: /* Get offdiagIdx[] for implicit 0.0 */
2504: ba = b->a;
2505: bi = b->i;
2506: bj = b->j;
2507: VecGetArrayWrite(offdiagV, &offdiagA);
2508: for (r = 0; r < m; r++) {
2509: ncols = bi[r+1] - bi[r];
2510: if (ncols == A->cmap->N - n) { /* Brow is dense */
2511: offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2512: } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2513: offdiagA[r] = 0.0;
2515: /* Find first hole in the cmap */
2516: for (j=0; j<ncols; j++) {
2517: col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2518: if (col > j && j < cstart) {
2519: offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2520: break;
2521: } else if (col > j + n && j >= cstart) {
2522: offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2523: break;
2524: }
2525: }
2526: if (j == ncols && ncols < A->cmap->N - n) {
2527: /* a hole is outside compressed Bcols */
2528: if (ncols == 0) {
2529: if (cstart) {
2530: offdiagIdx[r] = 0;
2531: } else offdiagIdx[r] = cend;
2532: } else { /* ncols > 0 */
2533: offdiagIdx[r] = cmap[ncols-1] + 1;
2534: if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2535: }
2536: }
2537: }
2539: for (j=0; j<ncols; j++) {
2540: if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2541: ba++; bj++;
2542: }
2543: }
2545: VecGetArrayWrite(v, &a);
2546: VecGetArrayRead(diagV, (const PetscScalar**)&diagA);
2547: for (r = 0; r < m; ++r) {
2548: if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2549: a[r] = diagA[r];
2550: if (idx) idx[r] = cstart + diagIdx[r];
2551: } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2552: a[r] = diagA[r];
2553: if (idx) {
2554: if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2555: idx[r] = cstart + diagIdx[r];
2556: } else idx[r] = offdiagIdx[r];
2557: }
2558: } else {
2559: a[r] = offdiagA[r];
2560: if (idx) idx[r] = offdiagIdx[r];
2561: }
2562: }
2563: VecRestoreArrayWrite(v, &a);
2564: VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);
2565: VecRestoreArrayWrite(offdiagV, &offdiagA);
2566: VecDestroy(&diagV);
2567: VecDestroy(&offdiagV);
2568: PetscFree2(diagIdx, offdiagIdx);
2569: return(0);
2570: }
2572: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2573: {
2574: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
2575: PetscInt m = A->rmap->n,n = A->cmap->n;
2576: PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2577: PetscInt *cmap = mat->garray;
2578: PetscInt *diagIdx, *offdiagIdx;
2579: Vec diagV, offdiagV;
2580: PetscScalar *a, *diagA, *offdiagA, *ba;
2581: PetscInt r,j,col,ncols,*bi,*bj;
2583: Mat B = mat->B;
2584: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2587: /* When a process holds entire A and other processes have no entry */
2588: if (A->cmap->N == n) {
2589: VecGetArrayWrite(v,&diagA);
2590: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2591: MatGetRowMax(mat->A,diagV,idx);
2592: VecDestroy(&diagV);
2593: VecRestoreArrayWrite(v,&diagA);
2594: return(0);
2595: } else if (n == 0) {
2596: if (m) {
2597: VecGetArrayWrite(v,&a);
2598: for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2599: VecRestoreArrayWrite(v,&a);
2600: }
2601: return(0);
2602: }
2604: PetscMalloc2(m,&diagIdx,m,&offdiagIdx);
2605: VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2606: VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2607: MatGetRowMax(mat->A, diagV, diagIdx);
2609: /* Get offdiagIdx[] for implicit 0.0 */
2610: ba = b->a;
2611: bi = b->i;
2612: bj = b->j;
2613: VecGetArrayWrite(offdiagV, &offdiagA);
2614: for (r = 0; r < m; r++) {
2615: ncols = bi[r+1] - bi[r];
2616: if (ncols == A->cmap->N - n) { /* Brow is dense */
2617: offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2618: } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2619: offdiagA[r] = 0.0;
2621: /* Find first hole in the cmap */
2622: for (j=0; j<ncols; j++) {
2623: col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2624: if (col > j && j < cstart) {
2625: offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2626: break;
2627: } else if (col > j + n && j >= cstart) {
2628: offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2629: break;
2630: }
2631: }
2632: if (j == ncols && ncols < A->cmap->N - n) {
2633: /* a hole is outside compressed Bcols */
2634: if (ncols == 0) {
2635: if (cstart) {
2636: offdiagIdx[r] = 0;
2637: } else offdiagIdx[r] = cend;
2638: } else { /* ncols > 0 */
2639: offdiagIdx[r] = cmap[ncols-1] + 1;
2640: if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2641: }
2642: }
2643: }
2645: for (j=0; j<ncols; j++) {
2646: if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2647: ba++; bj++;
2648: }
2649: }
2651: VecGetArrayWrite(v, &a);
2652: VecGetArrayRead(diagV,(const PetscScalar**)&diagA);
2653: for (r = 0; r < m; ++r) {
2654: if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2655: a[r] = diagA[r];
2656: if (idx) idx[r] = cstart + diagIdx[r];
2657: } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2658: a[r] = diagA[r];
2659: if (idx) {
2660: if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2661: idx[r] = cstart + diagIdx[r];
2662: } else idx[r] = offdiagIdx[r];
2663: }
2664: } else {
2665: a[r] = offdiagA[r];
2666: if (idx) idx[r] = offdiagIdx[r];
2667: }
2668: }
2669: VecRestoreArrayWrite(v, &a);
2670: VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);
2671: VecRestoreArrayWrite(offdiagV,&offdiagA);
2672: VecDestroy(&diagV);
2673: VecDestroy(&offdiagV);
2674: PetscFree2(diagIdx, offdiagIdx);
2675: return(0);
2676: }
2678: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2679: {
2681: Mat *dummy;
2684: MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2685: *newmat = *dummy;
2686: PetscFree(dummy);
2687: return(0);
2688: }
2690: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2691: {
2692: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2696: MatInvertBlockDiagonal(a->A,values);
2697: A->factorerrortype = a->A->factorerrortype;
2698: return(0);
2699: }
2701: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2702: {
2704: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
2707: if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2708: MatSetRandom(aij->A,rctx);
2709: if (x->assembled) {
2710: MatSetRandom(aij->B,rctx);
2711: } else {
2712: MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);
2713: }
2714: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
2715: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
2716: return(0);
2717: }
2719: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2720: {
2722: if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2723: else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2724: return(0);
2725: }
2727: /*@
2728: MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2730: Collective on Mat
2732: Input Parameters:
2733: + A - the matrix
2734: - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2736: Level: advanced
2738: @*/
2739: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2740: {
2741: PetscErrorCode ierr;
2744: PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2745: return(0);
2746: }
2748: PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2749: {
2750: PetscErrorCode ierr;
2751: PetscBool sc = PETSC_FALSE,flg;
2754: PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");
2755: if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2756: PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);
2757: if (flg) {
2758: MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);
2759: }
2760: PetscOptionsTail();
2761: return(0);
2762: }
2764: PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2765: {
2767: Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data;
2768: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data;
2771: if (!Y->preallocated) {
2772: MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);
2773: } else if (!aij->nz) {
2774: PetscInt nonew = aij->nonew;
2775: MatSeqAIJSetPreallocation(maij->A,1,NULL);
2776: aij->nonew = nonew;
2777: }
2778: MatShift_Basic(Y,a);
2779: return(0);
2780: }
2782: PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d)
2783: {
2784: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2788: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2789: MatMissingDiagonal(a->A,missing,d);
2790: if (d) {
2791: PetscInt rstart;
2792: MatGetOwnershipRange(A,&rstart,NULL);
2793: *d += rstart;
2795: }
2796: return(0);
2797: }
2799: PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2800: {
2801: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2805: MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);
2806: return(0);
2807: }
2809: /* -------------------------------------------------------------------*/
2810: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2811: MatGetRow_MPIAIJ,
2812: MatRestoreRow_MPIAIJ,
2813: MatMult_MPIAIJ,
2814: /* 4*/ MatMultAdd_MPIAIJ,
2815: MatMultTranspose_MPIAIJ,
2816: MatMultTransposeAdd_MPIAIJ,
2817: NULL,
2818: NULL,
2819: NULL,
2820: /*10*/ NULL,
2821: NULL,
2822: NULL,
2823: MatSOR_MPIAIJ,
2824: MatTranspose_MPIAIJ,
2825: /*15*/ MatGetInfo_MPIAIJ,
2826: MatEqual_MPIAIJ,
2827: MatGetDiagonal_MPIAIJ,
2828: MatDiagonalScale_MPIAIJ,
2829: MatNorm_MPIAIJ,
2830: /*20*/ MatAssemblyBegin_MPIAIJ,
2831: MatAssemblyEnd_MPIAIJ,
2832: MatSetOption_MPIAIJ,
2833: MatZeroEntries_MPIAIJ,
2834: /*24*/ MatZeroRows_MPIAIJ,
2835: NULL,
2836: NULL,
2837: NULL,
2838: NULL,
2839: /*29*/ MatSetUp_MPIAIJ,
2840: NULL,
2841: NULL,
2842: MatGetDiagonalBlock_MPIAIJ,
2843: NULL,
2844: /*34*/ MatDuplicate_MPIAIJ,
2845: NULL,
2846: NULL,
2847: NULL,
2848: NULL,
2849: /*39*/ MatAXPY_MPIAIJ,
2850: MatCreateSubMatrices_MPIAIJ,
2851: MatIncreaseOverlap_MPIAIJ,
2852: MatGetValues_MPIAIJ,
2853: MatCopy_MPIAIJ,
2854: /*44*/ MatGetRowMax_MPIAIJ,
2855: MatScale_MPIAIJ,
2856: MatShift_MPIAIJ,
2857: MatDiagonalSet_MPIAIJ,
2858: MatZeroRowsColumns_MPIAIJ,
2859: /*49*/ MatSetRandom_MPIAIJ,
2860: NULL,
2861: NULL,
2862: NULL,
2863: NULL,
2864: /*54*/ MatFDColoringCreate_MPIXAIJ,
2865: NULL,
2866: MatSetUnfactored_MPIAIJ,
2867: MatPermute_MPIAIJ,
2868: NULL,
2869: /*59*/ MatCreateSubMatrix_MPIAIJ,
2870: MatDestroy_MPIAIJ,
2871: MatView_MPIAIJ,
2872: NULL,
2873: NULL,
2874: /*64*/ NULL,
2875: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2876: NULL,
2877: NULL,
2878: NULL,
2879: /*69*/ MatGetRowMaxAbs_MPIAIJ,
2880: MatGetRowMinAbs_MPIAIJ,
2881: NULL,
2882: NULL,
2883: NULL,
2884: NULL,
2885: /*75*/ MatFDColoringApply_AIJ,
2886: MatSetFromOptions_MPIAIJ,
2887: NULL,
2888: NULL,
2889: MatFindZeroDiagonals_MPIAIJ,
2890: /*80*/ NULL,
2891: NULL,
2892: NULL,
2893: /*83*/ MatLoad_MPIAIJ,
2894: MatIsSymmetric_MPIAIJ,
2895: NULL,
2896: NULL,
2897: NULL,
2898: NULL,
2899: /*89*/ NULL,
2900: NULL,
2901: MatMatMultNumeric_MPIAIJ_MPIAIJ,
2902: NULL,
2903: NULL,
2904: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2905: NULL,
2906: NULL,
2907: NULL,
2908: MatBindToCPU_MPIAIJ,
2909: /*99*/ MatProductSetFromOptions_MPIAIJ,
2910: NULL,
2911: NULL,
2912: MatConjugate_MPIAIJ,
2913: NULL,
2914: /*104*/MatSetValuesRow_MPIAIJ,
2915: MatRealPart_MPIAIJ,
2916: MatImaginaryPart_MPIAIJ,
2917: NULL,
2918: NULL,
2919: /*109*/NULL,
2920: NULL,
2921: MatGetRowMin_MPIAIJ,
2922: NULL,
2923: MatMissingDiagonal_MPIAIJ,
2924: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2925: NULL,
2926: MatGetGhosts_MPIAIJ,
2927: NULL,
2928: NULL,
2929: /*119*/MatMultDiagonalBlock_MPIAIJ,
2930: NULL,
2931: NULL,
2932: NULL,
2933: MatGetMultiProcBlock_MPIAIJ,
2934: /*124*/MatFindNonzeroRows_MPIAIJ,
2935: MatGetColumnNorms_MPIAIJ,
2936: MatInvertBlockDiagonal_MPIAIJ,
2937: MatInvertVariableBlockDiagonal_MPIAIJ,
2938: MatCreateSubMatricesMPI_MPIAIJ,
2939: /*129*/NULL,
2940: NULL,
2941: NULL,
2942: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2943: NULL,
2944: /*134*/NULL,
2945: NULL,
2946: NULL,
2947: NULL,
2948: NULL,
2949: /*139*/MatSetBlockSizes_MPIAIJ,
2950: NULL,
2951: NULL,
2952: MatFDColoringSetUp_MPIXAIJ,
2953: MatFindOffBlockDiagonalEntries_MPIAIJ,
2954: MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2955: /*145*/NULL,
2956: NULL,
2957: NULL
2958: };
2960: /* ----------------------------------------------------------------------------------------*/
2962: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2963: {
2964: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2968: MatStoreValues(aij->A);
2969: MatStoreValues(aij->B);
2970: return(0);
2971: }
2973: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2974: {
2975: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2979: MatRetrieveValues(aij->A);
2980: MatRetrieveValues(aij->B);
2981: return(0);
2982: }
2984: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2985: {
2986: Mat_MPIAIJ *b;
2988: PetscMPIInt size;
2991: PetscLayoutSetUp(B->rmap);
2992: PetscLayoutSetUp(B->cmap);
2993: b = (Mat_MPIAIJ*)B->data;
2995: #if defined(PETSC_USE_CTABLE)
2996: PetscTableDestroy(&b->colmap);
2997: #else
2998: PetscFree(b->colmap);
2999: #endif
3000: PetscFree(b->garray);
3001: VecDestroy(&b->lvec);
3002: VecScatterDestroy(&b->Mvctx);
3004: /* Because the B will have been resized we simply destroy it and create a new one each time */
3005: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3006: MatDestroy(&b->B);
3007: MatCreate(PETSC_COMM_SELF,&b->B);
3008: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
3009: MatSetBlockSizesFromMats(b->B,B,B);
3010: MatSetType(b->B,MATSEQAIJ);
3011: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
3013: if (!B->preallocated) {
3014: MatCreate(PETSC_COMM_SELF,&b->A);
3015: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3016: MatSetBlockSizesFromMats(b->A,B,B);
3017: MatSetType(b->A,MATSEQAIJ);
3018: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
3019: }
3021: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3022: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3023: B->preallocated = PETSC_TRUE;
3024: B->was_assembled = PETSC_FALSE;
3025: B->assembled = PETSC_FALSE;
3026: return(0);
3027: }
3029: PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3030: {
3031: Mat_MPIAIJ *b;
3036: PetscLayoutSetUp(B->rmap);
3037: PetscLayoutSetUp(B->cmap);
3038: b = (Mat_MPIAIJ*)B->data;
3040: #if defined(PETSC_USE_CTABLE)
3041: PetscTableDestroy(&b->colmap);
3042: #else
3043: PetscFree(b->colmap);
3044: #endif
3045: PetscFree(b->garray);
3046: VecDestroy(&b->lvec);
3047: VecScatterDestroy(&b->Mvctx);
3049: MatResetPreallocation(b->A);
3050: MatResetPreallocation(b->B);
3051: B->preallocated = PETSC_TRUE;
3052: B->was_assembled = PETSC_FALSE;
3053: B->assembled = PETSC_FALSE;
3054: return(0);
3055: }
3057: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3058: {
3059: Mat mat;
3060: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3064: *newmat = NULL;
3065: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3066: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3067: MatSetBlockSizesFromMats(mat,matin,matin);
3068: MatSetType(mat,((PetscObject)matin)->type_name);
3069: a = (Mat_MPIAIJ*)mat->data;
3071: mat->factortype = matin->factortype;
3072: mat->assembled = matin->assembled;
3073: mat->insertmode = NOT_SET_VALUES;
3074: mat->preallocated = matin->preallocated;
3076: a->size = oldmat->size;
3077: a->rank = oldmat->rank;
3078: a->donotstash = oldmat->donotstash;
3079: a->roworiented = oldmat->roworiented;
3080: a->rowindices = NULL;
3081: a->rowvalues = NULL;
3082: a->getrowactive = PETSC_FALSE;
3084: PetscLayoutReference(matin->rmap,&mat->rmap);
3085: PetscLayoutReference(matin->cmap,&mat->cmap);
3087: if (oldmat->colmap) {
3088: #if defined(PETSC_USE_CTABLE)
3089: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3090: #else
3091: PetscMalloc1(mat->cmap->N,&a->colmap);
3092: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
3093: PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);
3094: #endif
3095: } else a->colmap = NULL;
3096: if (oldmat->garray) {
3097: PetscInt len;
3098: len = oldmat->B->cmap->n;
3099: PetscMalloc1(len+1,&a->garray);
3100: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3101: if (len) { PetscArraycpy(a->garray,oldmat->garray,len); }
3102: } else a->garray = NULL;
3104: /* It may happen MatDuplicate is called with a non-assembled matrix
3105: In fact, MatDuplicate only requires the matrix to be preallocated
3106: This may happen inside a DMCreateMatrix_Shell */
3107: if (oldmat->lvec) {
3108: VecDuplicate(oldmat->lvec,&a->lvec);
3109: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3110: }
3111: if (oldmat->Mvctx) {
3112: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3113: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3114: }
3115: if (oldmat->Mvctx_mpi1) {
3116: VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);
3117: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);
3118: }
3120: MatDuplicate(oldmat->A,cpvalues,&a->A);
3121: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3122: MatDuplicate(oldmat->B,cpvalues,&a->B);
3123: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3124: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3125: *newmat = mat;
3126: return(0);
3127: }
3129: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3130: {
3131: PetscBool isbinary, ishdf5;
3137: /* force binary viewer to load .info file if it has not yet done so */
3138: PetscViewerSetUp(viewer);
3139: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3140: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);
3141: if (isbinary) {
3142: MatLoad_MPIAIJ_Binary(newMat,viewer);
3143: } else if (ishdf5) {
3144: #if defined(PETSC_HAVE_HDF5)
3145: MatLoad_AIJ_HDF5(newMat,viewer);
3146: #else
3147: SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3148: #endif
3149: } else {
3150: SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3151: }
3152: return(0);
3153: }
3155: PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3156: {
3157: PetscInt header[4],M,N,m,nz,rows,cols,sum,i;
3158: PetscInt *rowidxs,*colidxs;
3159: PetscScalar *matvals;
3163: PetscViewerSetUp(viewer);
3165: /* read in matrix header */
3166: PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3167: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3168: M = header[1]; N = header[2]; nz = header[3];
3169: if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3170: if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3171: if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3173: /* set block sizes from the viewer's .info file */
3174: MatLoad_Binary_BlockSizes(mat,viewer);
3175: /* set global sizes if not set already */
3176: if (mat->rmap->N < 0) mat->rmap->N = M;
3177: if (mat->cmap->N < 0) mat->cmap->N = N;
3178: PetscLayoutSetUp(mat->rmap);
3179: PetscLayoutSetUp(mat->cmap);
3181: /* check if the matrix sizes are correct */
3182: MatGetSize(mat,&rows,&cols);
3183: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3185: /* read in row lengths and build row indices */
3186: MatGetLocalSize(mat,&m,NULL);
3187: PetscMalloc1(m+1,&rowidxs);
3188: PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);
3189: rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3190: MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));
3191: if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3192: /* read in column indices and matrix values */
3193: PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);
3194: PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);
3195: PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);
3196: /* store matrix indices and values */
3197: MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);
3198: PetscFree(rowidxs);
3199: PetscFree2(colidxs,matvals);
3200: return(0);
3201: }
3203: /* Not scalable because of ISAllGather() unless getting all columns. */
3204: PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3205: {
3207: IS iscol_local;
3208: PetscBool isstride;
3209: PetscMPIInt lisstride=0,gisstride;
3212: /* check if we are grabbing all columns*/
3213: PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);
3215: if (isstride) {
3216: PetscInt start,len,mstart,mlen;
3217: ISStrideGetInfo(iscol,&start,NULL);
3218: ISGetLocalSize(iscol,&len);
3219: MatGetOwnershipRangeColumn(mat,&mstart,&mlen);
3220: if (mstart == start && mlen-mstart == len) lisstride = 1;
3221: }
3223: MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));
3224: if (gisstride) {
3225: PetscInt N;
3226: MatGetSize(mat,NULL,&N);
3227: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);
3228: ISSetIdentity(iscol_local);
3229: PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");
3230: } else {
3231: PetscInt cbs;
3232: ISGetBlockSize(iscol,&cbs);
3233: ISAllGather(iscol,&iscol_local);
3234: ISSetBlockSize(iscol_local,cbs);
3235: }
3237: *isseq = iscol_local;
3238: return(0);
3239: }
3241: /*
3242: Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3243: (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3245: Input Parameters:
3246: mat - matrix
3247: isrow - parallel row index set; its local indices are a subset of local columns of mat,
3248: i.e., mat->rstart <= isrow[i] < mat->rend
3249: iscol - parallel column index set; its local indices are a subset of local columns of mat,
3250: i.e., mat->cstart <= iscol[i] < mat->cend
3251: Output Parameter:
3252: isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3253: iscol_o - sequential column index set for retrieving mat->B
3254: garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3255: */
3256: PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3257: {
3259: Vec x,cmap;
3260: const PetscInt *is_idx;
3261: PetscScalar *xarray,*cmaparray;
3262: PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count;
3263: Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3264: Mat B=a->B;
3265: Vec lvec=a->lvec,lcmap;
3266: PetscInt i,cstart,cend,Bn=B->cmap->N;
3267: MPI_Comm comm;
3268: VecScatter Mvctx=a->Mvctx;
3271: PetscObjectGetComm((PetscObject)mat,&comm);
3272: ISGetLocalSize(iscol,&ncols);
3274: /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3275: MatCreateVecs(mat,&x,NULL);
3276: VecSet(x,-1.0);
3277: VecDuplicate(x,&cmap);
3278: VecSet(cmap,-1.0);
3280: /* Get start indices */
3281: MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);
3282: isstart -= ncols;
3283: MatGetOwnershipRangeColumn(mat,&cstart,&cend);
3285: ISGetIndices(iscol,&is_idx);
3286: VecGetArray(x,&xarray);
3287: VecGetArray(cmap,&cmaparray);
3288: PetscMalloc1(ncols,&idx);
3289: for (i=0; i<ncols; i++) {
3290: xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i];
3291: cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */
3292: idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */
3293: }
3294: VecRestoreArray(x,&xarray);
3295: VecRestoreArray(cmap,&cmaparray);
3296: ISRestoreIndices(iscol,&is_idx);
3298: /* Get iscol_d */
3299: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);
3300: ISGetBlockSize(iscol,&i);
3301: ISSetBlockSize(*iscol_d,i);
3303: /* Get isrow_d */
3304: ISGetLocalSize(isrow,&m);
3305: rstart = mat->rmap->rstart;
3306: PetscMalloc1(m,&idx);
3307: ISGetIndices(isrow,&is_idx);
3308: for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3309: ISRestoreIndices(isrow,&is_idx);
3311: ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);
3312: ISGetBlockSize(isrow,&i);
3313: ISSetBlockSize(*isrow_d,i);
3315: /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3316: VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);
3317: VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);
3319: VecDuplicate(lvec,&lcmap);
3321: VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);
3322: VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);
3324: /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3325: /* off-process column indices */
3326: count = 0;
3327: PetscMalloc1(Bn,&idx);
3328: PetscMalloc1(Bn,&cmap1);
3330: VecGetArray(lvec,&xarray);
3331: VecGetArray(lcmap,&cmaparray);
3332: for (i=0; i<Bn; i++) {
3333: if (PetscRealPart(xarray[i]) > -1.0) {
3334: idx[count] = i; /* local column index in off-diagonal part B */
3335: cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3336: count++;
3337: }
3338: }
3339: VecRestoreArray(lvec,&xarray);
3340: VecRestoreArray(lcmap,&cmaparray);
3342: ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);
3343: /* cannot ensure iscol_o has same blocksize as iscol! */
3345: PetscFree(idx);
3346: *garray = cmap1;
3348: VecDestroy(&x);
3349: VecDestroy(&cmap);
3350: VecDestroy(&lcmap);
3351: return(0);
3352: }
3354: /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3355: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3356: {
3358: Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub;
3359: Mat M = NULL;
3360: MPI_Comm comm;
3361: IS iscol_d,isrow_d,iscol_o;
3362: Mat Asub = NULL,Bsub = NULL;
3363: PetscInt n;
3366: PetscObjectGetComm((PetscObject)mat,&comm);
3368: if (call == MAT_REUSE_MATRIX) {
3369: /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3370: PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);
3371: if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3373: PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);
3374: if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3376: PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);
3377: if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3379: /* Update diagonal and off-diagonal portions of submat */
3380: asub = (Mat_MPIAIJ*)(*submat)->data;
3381: MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);
3382: ISGetLocalSize(iscol_o,&n);
3383: if (n) {
3384: MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);
3385: }
3386: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3387: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3389: } else { /* call == MAT_INITIAL_MATRIX) */
3390: const PetscInt *garray;
3391: PetscInt BsubN;
3393: /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3394: ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);
3396: /* Create local submatrices Asub and Bsub */
3397: MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);
3398: MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);
3400: /* Create submatrix M */
3401: MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);
3403: /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3404: asub = (Mat_MPIAIJ*)M->data;
3406: ISGetLocalSize(iscol_o,&BsubN);
3407: n = asub->B->cmap->N;
3408: if (BsubN > n) {
3409: /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3410: const PetscInt *idx;
3411: PetscInt i,j,*idx_new,*subgarray = asub->garray;
3412: PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);
3414: PetscMalloc1(n,&idx_new);
3415: j = 0;
3416: ISGetIndices(iscol_o,&idx);
3417: for (i=0; i<n; i++) {
3418: if (j >= BsubN) break;
3419: while (subgarray[i] > garray[j]) j++;
3421: if (subgarray[i] == garray[j]) {
3422: idx_new[i] = idx[j++];
3423: } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3424: }
3425: ISRestoreIndices(iscol_o,&idx);
3427: ISDestroy(&iscol_o);
3428: ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);
3430: } else if (BsubN < n) {
3431: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3432: }
3434: PetscFree(garray);
3435: *submat = M;
3437: /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3438: PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);
3439: ISDestroy(&isrow_d);
3441: PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);
3442: ISDestroy(&iscol_d);
3444: PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);
3445: ISDestroy(&iscol_o);
3446: }
3447: return(0);
3448: }
3450: PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3451: {
3453: IS iscol_local=NULL,isrow_d;
3454: PetscInt csize;
3455: PetscInt n,i,j,start,end;
3456: PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3457: MPI_Comm comm;
3460: /* If isrow has same processor distribution as mat,
3461: call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3462: if (call == MAT_REUSE_MATRIX) {
3463: PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);
3464: if (isrow_d) {
3465: sameRowDist = PETSC_TRUE;
3466: tsameDist[1] = PETSC_TRUE; /* sameColDist */
3467: } else {
3468: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);
3469: if (iscol_local) {
3470: sameRowDist = PETSC_TRUE;
3471: tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3472: }
3473: }
3474: } else {
3475: /* Check if isrow has same processor distribution as mat */
3476: sameDist[0] = PETSC_FALSE;
3477: ISGetLocalSize(isrow,&n);
3478: if (!n) {
3479: sameDist[0] = PETSC_TRUE;
3480: } else {
3481: ISGetMinMax(isrow,&i,&j);
3482: MatGetOwnershipRange(mat,&start,&end);
3483: if (i >= start && j < end) {
3484: sameDist[0] = PETSC_TRUE;
3485: }
3486: }
3488: /* Check if iscol has same processor distribution as mat */
3489: sameDist[1] = PETSC_FALSE;
3490: ISGetLocalSize(iscol,&n);
3491: if (!n) {
3492: sameDist[1] = PETSC_TRUE;
3493: } else {
3494: ISGetMinMax(iscol,&i,&j);
3495: MatGetOwnershipRangeColumn(mat,&start,&end);
3496: if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3497: }
3499: PetscObjectGetComm((PetscObject)mat,&comm);
3500: MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);
3501: sameRowDist = tsameDist[0];
3502: }
3504: if (sameRowDist) {
3505: if (tsameDist[1]) { /* sameRowDist & sameColDist */
3506: /* isrow and iscol have same processor distribution as mat */
3507: MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);
3508: return(0);
3509: } else { /* sameRowDist */
3510: /* isrow has same processor distribution as mat */
3511: if (call == MAT_INITIAL_MATRIX) {
3512: PetscBool sorted;
3513: ISGetSeqIS_Private(mat,iscol,&iscol_local);
3514: ISGetLocalSize(iscol_local,&n); /* local size of iscol_local = global columns of newmat */
3515: ISGetSize(iscol,&i);
3516: if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3518: ISSorted(iscol_local,&sorted);
3519: if (sorted) {
3520: /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3521: MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);
3522: return(0);
3523: }
3524: } else { /* call == MAT_REUSE_MATRIX */
3525: IS iscol_sub;
3526: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3527: if (iscol_sub) {
3528: MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);
3529: return(0);
3530: }
3531: }
3532: }
3533: }
3535: /* General case: iscol -> iscol_local which has global size of iscol */
3536: if (call == MAT_REUSE_MATRIX) {
3537: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3538: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3539: } else {
3540: if (!iscol_local) {
3541: ISGetSeqIS_Private(mat,iscol,&iscol_local);
3542: }
3543: }
3545: ISGetLocalSize(iscol,&csize);
3546: MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);
3548: if (call == MAT_INITIAL_MATRIX) {
3549: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3550: ISDestroy(&iscol_local);
3551: }
3552: return(0);
3553: }
3555: /*@C
3556: MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3557: and "off-diagonal" part of the matrix in CSR format.
3559: Collective
3561: Input Parameters:
3562: + comm - MPI communicator
3563: . A - "diagonal" portion of matrix
3564: . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3565: - garray - global index of B columns
3567: Output Parameter:
3568: . mat - the matrix, with input A as its local diagonal matrix
3569: Level: advanced
3571: Notes:
3572: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3573: A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3575: .seealso: MatCreateMPIAIJWithSplitArrays()
3576: @*/
3577: PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3578: {
3580: Mat_MPIAIJ *maij;
3581: Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew;
3582: PetscInt *oi=b->i,*oj=b->j,i,nz,col;
3583: PetscScalar *oa=b->a;
3584: Mat Bnew;
3585: PetscInt m,n,N;
3588: MatCreate(comm,mat);
3589: MatGetSize(A,&m,&n);
3590: if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3591: if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3592: /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3593: /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3595: /* Get global columns of mat */
3596: MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);
3598: MatSetSizes(*mat,m,n,PETSC_DECIDE,N);
3599: MatSetType(*mat,MATMPIAIJ);
3600: MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);
3601: maij = (Mat_MPIAIJ*)(*mat)->data;
3603: (*mat)->preallocated = PETSC_TRUE;
3605: PetscLayoutSetUp((*mat)->rmap);
3606: PetscLayoutSetUp((*mat)->cmap);
3608: /* Set A as diagonal portion of *mat */
3609: maij->A = A;
3611: nz = oi[m];
3612: for (i=0; i<nz; i++) {
3613: col = oj[i];
3614: oj[i] = garray[col];
3615: }
3617: /* Set Bnew as off-diagonal portion of *mat */
3618: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);
3619: bnew = (Mat_SeqAIJ*)Bnew->data;
3620: bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3621: maij->B = Bnew;
3623: if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3625: b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3626: b->free_a = PETSC_FALSE;
3627: b->free_ij = PETSC_FALSE;
3628: MatDestroy(&B);
3630: bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3631: bnew->free_a = PETSC_TRUE;
3632: bnew->free_ij = PETSC_TRUE;
3634: /* condense columns of maij->B */
3635: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
3636: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3637: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3638: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
3639: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3640: return(0);
3641: }
3643: extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3645: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3646: {
3648: PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs;
3649: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3650: Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3651: Mat M,Msub,B=a->B;
3652: MatScalar *aa;
3653: Mat_SeqAIJ *aij;
3654: PetscInt *garray = a->garray,*colsub,Ncols;
3655: PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3656: IS iscol_sub,iscmap;
3657: const PetscInt *is_idx,*cmap;
3658: PetscBool allcolumns=PETSC_FALSE;
3659: MPI_Comm comm;
3662: PetscObjectGetComm((PetscObject)mat,&comm);
3664: if (call == MAT_REUSE_MATRIX) {
3665: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3666: if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3667: ISGetLocalSize(iscol_sub,&count);
3669: PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);
3670: if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3672: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);
3673: if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3675: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);
3677: } else { /* call == MAT_INITIAL_MATRIX) */
3678: PetscBool flg;
3680: ISGetLocalSize(iscol,&n);
3681: ISGetSize(iscol,&Ncols);
3683: /* (1) iscol -> nonscalable iscol_local */
3684: /* Check for special case: each processor gets entire matrix columns */
3685: ISIdentity(iscol_local,&flg);
3686: if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3687: MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));
3688: if (allcolumns) {
3689: iscol_sub = iscol_local;
3690: PetscObjectReference((PetscObject)iscol_local);
3691: ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);
3693: } else {
3694: /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3695: PetscInt *idx,*cmap1,k;
3696: PetscMalloc1(Ncols,&idx);
3697: PetscMalloc1(Ncols,&cmap1);
3698: ISGetIndices(iscol_local,&is_idx);
3699: count = 0;
3700: k = 0;
3701: for (i=0; i<Ncols; i++) {
3702: j = is_idx[i];
3703: if (j >= cstart && j < cend) {
3704: /* diagonal part of mat */
3705: idx[count] = j;
3706: cmap1[count++] = i; /* column index in submat */
3707: } else if (Bn) {
3708: /* off-diagonal part of mat */
3709: if (j == garray[k]) {
3710: idx[count] = j;
3711: cmap1[count++] = i; /* column index in submat */
3712: } else if (j > garray[k]) {
3713: while (j > garray[k] && k < Bn-1) k++;
3714: if (j == garray[k]) {
3715: idx[count] = j;
3716: cmap1[count++] = i; /* column index in submat */
3717: }
3718: }
3719: }
3720: }
3721: ISRestoreIndices(iscol_local,&is_idx);
3723: ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);
3724: ISGetBlockSize(iscol,&cbs);
3725: ISSetBlockSize(iscol_sub,cbs);
3727: ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);
3728: }
3730: /* (3) Create sequential Msub */
3731: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);
3732: }
3734: ISGetLocalSize(iscol_sub,&count);
3735: aij = (Mat_SeqAIJ*)(Msub)->data;
3736: ii = aij->i;
3737: ISGetIndices(iscmap,&cmap);
3739: /*
3740: m - number of local rows
3741: Ncols - number of columns (same on all processors)
3742: rstart - first row in new global matrix generated
3743: */
3744: MatGetSize(Msub,&m,NULL);
3746: if (call == MAT_INITIAL_MATRIX) {
3747: /* (4) Create parallel newmat */
3748: PetscMPIInt rank,size;
3749: PetscInt csize;
3751: MPI_Comm_size(comm,&size);
3752: MPI_Comm_rank(comm,&rank);
3754: /*
3755: Determine the number of non-zeros in the diagonal and off-diagonal
3756: portions of the matrix in order to do correct preallocation
3757: */
3759: /* first get start and end of "diagonal" columns */
3760: ISGetLocalSize(iscol,&csize);
3761: if (csize == PETSC_DECIDE) {
3762: ISGetSize(isrow,&mglobal);
3763: if (mglobal == Ncols) { /* square matrix */
3764: nlocal = m;
3765: } else {
3766: nlocal = Ncols/size + ((Ncols % size) > rank);
3767: }
3768: } else {
3769: nlocal = csize;
3770: }
3771: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3772: rstart = rend - nlocal;
3773: if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3775: /* next, compute all the lengths */
3776: jj = aij->j;
3777: PetscMalloc1(2*m+1,&dlens);
3778: olens = dlens + m;
3779: for (i=0; i<m; i++) {
3780: jend = ii[i+1] - ii[i];
3781: olen = 0;
3782: dlen = 0;
3783: for (j=0; j<jend; j++) {
3784: if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3785: else dlen++;
3786: jj++;
3787: }
3788: olens[i] = olen;
3789: dlens[i] = dlen;
3790: }
3792: ISGetBlockSize(isrow,&bs);
3793: ISGetBlockSize(iscol,&cbs);
3795: MatCreate(comm,&M);
3796: MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);
3797: MatSetBlockSizes(M,bs,cbs);
3798: MatSetType(M,((PetscObject)mat)->type_name);
3799: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3800: PetscFree(dlens);
3802: } else { /* call == MAT_REUSE_MATRIX */
3803: M = *newmat;
3804: MatGetLocalSize(M,&i,NULL);
3805: if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3806: MatZeroEntries(M);
3807: /*
3808: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3809: rather than the slower MatSetValues().
3810: */
3811: M->was_assembled = PETSC_TRUE;
3812: M->assembled = PETSC_FALSE;
3813: }
3815: /* (5) Set values of Msub to *newmat */
3816: PetscMalloc1(count,&colsub);
3817: MatGetOwnershipRange(M,&rstart,NULL);
3819: jj = aij->j;
3820: aa = aij->a;
3821: for (i=0; i<m; i++) {
3822: row = rstart + i;
3823: nz = ii[i+1] - ii[i];
3824: for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3825: MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);
3826: jj += nz; aa += nz;
3827: }
3828: ISRestoreIndices(iscmap,&cmap);
3830: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3831: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3833: PetscFree(colsub);
3835: /* save Msub, iscol_sub and iscmap used in processor for next request */
3836: if (call == MAT_INITIAL_MATRIX) {
3837: *newmat = M;
3838: PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);
3839: MatDestroy(&Msub);
3841: PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);
3842: ISDestroy(&iscol_sub);
3844: PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);
3845: ISDestroy(&iscmap);
3847: if (iscol_local) {
3848: PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);
3849: ISDestroy(&iscol_local);
3850: }
3851: }
3852: return(0);
3853: }
3855: /*
3856: Not great since it makes two copies of the submatrix, first an SeqAIJ
3857: in local and then by concatenating the local matrices the end result.
3858: Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3860: Note: This requires a sequential iscol with all indices.
3861: */
3862: PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3863: {
3865: PetscMPIInt rank,size;
3866: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3867: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3868: Mat M,Mreuse;
3869: MatScalar *aa,*vwork;
3870: MPI_Comm comm;
3871: Mat_SeqAIJ *aij;
3872: PetscBool colflag,allcolumns=PETSC_FALSE;
3875: PetscObjectGetComm((PetscObject)mat,&comm);
3876: MPI_Comm_rank(comm,&rank);
3877: MPI_Comm_size(comm,&size);
3879: /* Check for special case: each processor gets entire matrix columns */
3880: ISIdentity(iscol,&colflag);
3881: ISGetLocalSize(iscol,&n);
3882: if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3883: MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));
3885: if (call == MAT_REUSE_MATRIX) {
3886: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3887: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3888: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);
3889: } else {
3890: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);
3891: }
3893: /*
3894: m - number of local rows
3895: n - number of columns (same on all processors)
3896: rstart - first row in new global matrix generated
3897: */
3898: MatGetSize(Mreuse,&m,&n);
3899: MatGetBlockSizes(Mreuse,&bs,&cbs);
3900: if (call == MAT_INITIAL_MATRIX) {
3901: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3902: ii = aij->i;
3903: jj = aij->j;
3905: /*
3906: Determine the number of non-zeros in the diagonal and off-diagonal
3907: portions of the matrix in order to do correct preallocation
3908: */
3910: /* first get start and end of "diagonal" columns */
3911: if (csize == PETSC_DECIDE) {
3912: ISGetSize(isrow,&mglobal);
3913: if (mglobal == n) { /* square matrix */
3914: nlocal = m;
3915: } else {
3916: nlocal = n/size + ((n % size) > rank);
3917: }
3918: } else {
3919: nlocal = csize;
3920: }
3921: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3922: rstart = rend - nlocal;
3923: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3925: /* next, compute all the lengths */
3926: PetscMalloc1(2*m+1,&dlens);
3927: olens = dlens + m;
3928: for (i=0; i<m; i++) {
3929: jend = ii[i+1] - ii[i];
3930: olen = 0;
3931: dlen = 0;
3932: for (j=0; j<jend; j++) {
3933: if (*jj < rstart || *jj >= rend) olen++;
3934: else dlen++;
3935: jj++;
3936: }
3937: olens[i] = olen;
3938: dlens[i] = dlen;
3939: }
3940: MatCreate(comm,&M);
3941: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3942: MatSetBlockSizes(M,bs,cbs);
3943: MatSetType(M,((PetscObject)mat)->type_name);
3944: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3945: PetscFree(dlens);
3946: } else {
3947: PetscInt ml,nl;
3949: M = *newmat;
3950: MatGetLocalSize(M,&ml,&nl);
3951: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3952: MatZeroEntries(M);
3953: /*
3954: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3955: rather than the slower MatSetValues().
3956: */
3957: M->was_assembled = PETSC_TRUE;
3958: M->assembled = PETSC_FALSE;
3959: }
3960: MatGetOwnershipRange(M,&rstart,&rend);
3961: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3962: ii = aij->i;
3963: jj = aij->j;
3964: aa = aij->a;
3965: for (i=0; i<m; i++) {
3966: row = rstart + i;
3967: nz = ii[i+1] - ii[i];
3968: cwork = jj; jj += nz;
3969: vwork = aa; aa += nz;
3970: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3971: }
3973: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3974: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3975: *newmat = M;
3977: /* save submatrix used in processor for next request */
3978: if (call == MAT_INITIAL_MATRIX) {
3979: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3980: MatDestroy(&Mreuse);
3981: }
3982: return(0);
3983: }
3985: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3986: {
3987: PetscInt m,cstart, cend,j,nnz,i,d;
3988: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3989: const PetscInt *JJ;
3991: PetscBool nooffprocentries;
3994: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3996: PetscLayoutSetUp(B->rmap);
3997: PetscLayoutSetUp(B->cmap);
3998: m = B->rmap->n;
3999: cstart = B->cmap->rstart;
4000: cend = B->cmap->rend;
4001: rstart = B->rmap->rstart;
4003: PetscCalloc2(m,&d_nnz,m,&o_nnz);
4005: if (PetscDefined(USE_DEBUG)) {
4006: for (i=0; i<m; i++) {
4007: nnz = Ii[i+1]- Ii[i];
4008: JJ = J + Ii[i];
4009: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4010: if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4011: if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4012: }
4013: }
4015: for (i=0; i<m; i++) {
4016: nnz = Ii[i+1]- Ii[i];
4017: JJ = J + Ii[i];
4018: nnz_max = PetscMax(nnz_max,nnz);
4019: d = 0;
4020: for (j=0; j<nnz; j++) {
4021: if (cstart <= JJ[j] && JJ[j] < cend) d++;
4022: }
4023: d_nnz[i] = d;
4024: o_nnz[i] = nnz - d;
4025: }
4026: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
4027: PetscFree2(d_nnz,o_nnz);
4029: for (i=0; i<m; i++) {
4030: ii = i + rstart;
4031: MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);
4032: }
4033: nooffprocentries = B->nooffprocentries;
4034: B->nooffprocentries = PETSC_TRUE;
4035: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4036: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4037: B->nooffprocentries = nooffprocentries;
4039: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
4040: return(0);
4041: }
4043: /*@
4044: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4045: (the default parallel PETSc format).
4047: Collective
4049: Input Parameters:
4050: + B - the matrix
4051: . i - the indices into j for the start of each local row (starts with zero)
4052: . j - the column indices for each local row (starts with zero)
4053: - v - optional values in the matrix
4055: Level: developer
4057: Notes:
4058: The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4059: thus you CANNOT change the matrix entries by changing the values of v[] after you have
4060: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4062: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4064: The format which is used for the sparse matrix input, is equivalent to a
4065: row-major ordering.. i.e for the following matrix, the input data expected is
4066: as shown
4068: $ 1 0 0
4069: $ 2 0 3 P0
4070: $ -------
4071: $ 4 5 6 P1
4072: $
4073: $ Process0 [P0]: rows_owned=[0,1]
4074: $ i = {0,1,3} [size = nrow+1 = 2+1]
4075: $ j = {0,0,2} [size = 3]
4076: $ v = {1,2,3} [size = 3]
4077: $
4078: $ Process1 [P1]: rows_owned=[2]
4079: $ i = {0,3} [size = nrow+1 = 1+1]
4080: $ j = {0,1,2} [size = 3]
4081: $ v = {4,5,6} [size = 3]
4083: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4084: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4085: @*/
4086: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4087: {
4091: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
4092: return(0);
4093: }
4095: /*@C
4096: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4097: (the default parallel PETSc format). For good matrix assembly performance
4098: the user should preallocate the matrix storage by setting the parameters
4099: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4100: performance can be increased by more than a factor of 50.
4102: Collective
4104: Input Parameters:
4105: + B - the matrix
4106: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4107: (same value is used for all local rows)
4108: . d_nnz - array containing the number of nonzeros in the various rows of the
4109: DIAGONAL portion of the local submatrix (possibly different for each row)
4110: or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4111: The size of this array is equal to the number of local rows, i.e 'm'.
4112: For matrices that will be factored, you must leave room for (and set)
4113: the diagonal entry even if it is zero.
4114: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4115: submatrix (same value is used for all local rows).
4116: - o_nnz - array containing the number of nonzeros in the various rows of the
4117: OFF-DIAGONAL portion of the local submatrix (possibly different for
4118: each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4119: structure. The size of this array is equal to the number
4120: of local rows, i.e 'm'.
4122: If the *_nnz parameter is given then the *_nz parameter is ignored
4124: The AIJ format (also called the Yale sparse matrix format or
4125: compressed row storage (CSR)), is fully compatible with standard Fortran 77
4126: storage. The stored row and column indices begin with zero.
4127: See Users-Manual: ch_mat for details.
4129: The parallel matrix is partitioned such that the first m0 rows belong to
4130: process 0, the next m1 rows belong to process 1, the next m2 rows belong
4131: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4133: The DIAGONAL portion of the local submatrix of a processor can be defined
4134: as the submatrix which is obtained by extraction the part corresponding to
4135: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4136: first row that belongs to the processor, r2 is the last row belonging to
4137: the this processor, and c1-c2 is range of indices of the local part of a
4138: vector suitable for applying the matrix to. This is an mxn matrix. In the
4139: common case of a square matrix, the row and column ranges are the same and
4140: the DIAGONAL part is also square. The remaining portion of the local
4141: submatrix (mxN) constitute the OFF-DIAGONAL portion.
4143: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4145: You can call MatGetInfo() to get information on how effective the preallocation was;
4146: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4147: You can also run with the option -info and look for messages with the string
4148: malloc in them to see if additional memory allocation was needed.
4150: Example usage:
4152: Consider the following 8x8 matrix with 34 non-zero values, that is
4153: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4154: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4155: as follows:
4157: .vb
4158: 1 2 0 | 0 3 0 | 0 4
4159: Proc0 0 5 6 | 7 0 0 | 8 0
4160: 9 0 10 | 11 0 0 | 12 0
4161: -------------------------------------
4162: 13 0 14 | 15 16 17 | 0 0
4163: Proc1 0 18 0 | 19 20 21 | 0 0
4164: 0 0 0 | 22 23 0 | 24 0
4165: -------------------------------------
4166: Proc2 25 26 27 | 0 0 28 | 29 0
4167: 30 0 0 | 31 32 33 | 0 34
4168: .ve
4170: This can be represented as a collection of submatrices as:
4172: .vb
4173: A B C
4174: D E F
4175: G H I
4176: .ve
4178: Where the submatrices A,B,C are owned by proc0, D,E,F are
4179: owned by proc1, G,H,I are owned by proc2.
4181: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4182: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4183: The 'M','N' parameters are 8,8, and have the same values on all procs.
4185: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4186: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4187: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4188: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4189: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4190: matrix, ans [DF] as another SeqAIJ matrix.
4192: When d_nz, o_nz parameters are specified, d_nz storage elements are
4193: allocated for every row of the local diagonal submatrix, and o_nz
4194: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4195: One way to choose d_nz and o_nz is to use the max nonzerors per local
4196: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4197: In this case, the values of d_nz,o_nz are:
4198: .vb
4199: proc0 : dnz = 2, o_nz = 2
4200: proc1 : dnz = 3, o_nz = 2
4201: proc2 : dnz = 1, o_nz = 4
4202: .ve
4203: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4204: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4205: for proc3. i.e we are using 12+15+10=37 storage locations to store
4206: 34 values.
4208: When d_nnz, o_nnz parameters are specified, the storage is specified
4209: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4210: In the above case the values for d_nnz,o_nnz are:
4211: .vb
4212: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4213: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4214: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4215: .ve
4216: Here the space allocated is sum of all the above values i.e 34, and
4217: hence pre-allocation is perfect.
4219: Level: intermediate
4221: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4222: MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4223: @*/
4224: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4225: {
4231: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4232: return(0);
4233: }
4235: /*@
4236: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4237: CSR format for the local rows.
4239: Collective
4241: Input Parameters:
4242: + comm - MPI communicator
4243: . m - number of local rows (Cannot be PETSC_DECIDE)
4244: . n - This value should be the same as the local size used in creating the
4245: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4246: calculated if N is given) For square matrices n is almost always m.
4247: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4248: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4249: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4250: . j - column indices
4251: - a - matrix values
4253: Output Parameter:
4254: . mat - the matrix
4256: Level: intermediate
4258: Notes:
4259: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4260: thus you CANNOT change the matrix entries by changing the values of a[] after you have
4261: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4263: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4265: The format which is used for the sparse matrix input, is equivalent to a
4266: row-major ordering.. i.e for the following matrix, the input data expected is
4267: as shown
4269: Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4271: $ 1 0 0
4272: $ 2 0 3 P0
4273: $ -------
4274: $ 4 5 6 P1
4275: $
4276: $ Process0 [P0]: rows_owned=[0,1]
4277: $ i = {0,1,3} [size = nrow+1 = 2+1]
4278: $ j = {0,0,2} [size = 3]
4279: $ v = {1,2,3} [size = 3]
4280: $
4281: $ Process1 [P1]: rows_owned=[2]
4282: $ i = {0,3} [size = nrow+1 = 1+1]
4283: $ j = {0,1,2} [size = 3]
4284: $ v = {4,5,6} [size = 3]
4286: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287: MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4288: @*/
4289: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4290: {
4294: if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4295: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4296: MatCreate(comm,mat);
4297: MatSetSizes(*mat,m,n,M,N);
4298: /* MatSetBlockSizes(M,bs,cbs); */
4299: MatSetType(*mat,MATMPIAIJ);
4300: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4301: return(0);
4302: }
4304: /*@
4305: MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4306: CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4308: Collective
4310: Input Parameters:
4311: + mat - the matrix
4312: . m - number of local rows (Cannot be PETSC_DECIDE)
4313: . n - This value should be the same as the local size used in creating the
4314: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4315: calculated if N is given) For square matrices n is almost always m.
4316: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4317: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4318: . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4319: . J - column indices
4320: - v - matrix values
4322: Level: intermediate
4324: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4325: MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4326: @*/
4327: PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4328: {
4330: PetscInt cstart,nnz,i,j;
4331: PetscInt *ld;
4332: PetscBool nooffprocentries;
4333: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data;
4334: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data;
4335: PetscScalar *ad = Ad->a, *ao = Ao->a;
4336: const PetscInt *Adi = Ad->i;
4337: PetscInt ldi,Iii,md;
4340: if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4341: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4342: if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4343: if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4345: cstart = mat->cmap->rstart;
4346: if (!Aij->ld) {
4347: /* count number of entries below block diagonal */
4348: PetscCalloc1(m,&ld);
4349: Aij->ld = ld;
4350: for (i=0; i<m; i++) {
4351: nnz = Ii[i+1]- Ii[i];
4352: j = 0;
4353: while (J[j] < cstart && j < nnz) {j++;}
4354: J += nnz;
4355: ld[i] = j;
4356: }
4357: } else {
4358: ld = Aij->ld;
4359: }
4361: for (i=0; i<m; i++) {
4362: nnz = Ii[i+1]- Ii[i];
4363: Iii = Ii[i];
4364: ldi = ld[i];
4365: md = Adi[i+1]-Adi[i];
4366: PetscArraycpy(ao,v + Iii,ldi);
4367: PetscArraycpy(ad,v + Iii + ldi,md);
4368: PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);
4369: ad += md;
4370: ao += nnz - md;
4371: }
4372: nooffprocentries = mat->nooffprocentries;
4373: mat->nooffprocentries = PETSC_TRUE;
4374: PetscObjectStateIncrease((PetscObject)Aij->A);
4375: PetscObjectStateIncrease((PetscObject)Aij->B);
4376: PetscObjectStateIncrease((PetscObject)mat);
4377: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
4378: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
4379: mat->nooffprocentries = nooffprocentries;
4380: return(0);
4381: }
4383: /*@C
4384: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4385: (the default parallel PETSc format). For good matrix assembly performance
4386: the user should preallocate the matrix storage by setting the parameters
4387: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4388: performance can be increased by more than a factor of 50.
4390: Collective
4392: Input Parameters:
4393: + comm - MPI communicator
4394: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4395: This value should be the same as the local size used in creating the
4396: y vector for the matrix-vector product y = Ax.
4397: . n - This value should be the same as the local size used in creating the
4398: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4399: calculated if N is given) For square matrices n is almost always m.
4400: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4401: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4402: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4403: (same value is used for all local rows)
4404: . d_nnz - array containing the number of nonzeros in the various rows of the
4405: DIAGONAL portion of the local submatrix (possibly different for each row)
4406: or NULL, if d_nz is used to specify the nonzero structure.
4407: The size of this array is equal to the number of local rows, i.e 'm'.
4408: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4409: submatrix (same value is used for all local rows).
4410: - o_nnz - array containing the number of nonzeros in the various rows of the
4411: OFF-DIAGONAL portion of the local submatrix (possibly different for
4412: each row) or NULL, if o_nz is used to specify the nonzero
4413: structure. The size of this array is equal to the number
4414: of local rows, i.e 'm'.
4416: Output Parameter:
4417: . A - the matrix
4419: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4420: MatXXXXSetPreallocation() paradigm instead of this routine directly.
4421: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4423: Notes:
4424: If the *_nnz parameter is given then the *_nz parameter is ignored
4426: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4427: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4428: storage requirements for this matrix.
4430: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4431: processor than it must be used on all processors that share the object for
4432: that argument.
4434: The user MUST specify either the local or global matrix dimensions
4435: (possibly both).
4437: The parallel matrix is partitioned across processors such that the
4438: first m0 rows belong to process 0, the next m1 rows belong to
4439: process 1, the next m2 rows belong to process 2 etc.. where
4440: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4441: values corresponding to [m x N] submatrix.
4443: The columns are logically partitioned with the n0 columns belonging
4444: to 0th partition, the next n1 columns belonging to the next
4445: partition etc.. where n0,n1,n2... are the input parameter 'n'.
4447: The DIAGONAL portion of the local submatrix on any given processor
4448: is the submatrix corresponding to the rows and columns m,n
4449: corresponding to the given processor. i.e diagonal matrix on
4450: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4451: etc. The remaining portion of the local submatrix [m x (N-n)]
4452: constitute the OFF-DIAGONAL portion. The example below better
4453: illustrates this concept.
4455: For a square global matrix we define each processor's diagonal portion
4456: to be its local rows and the corresponding columns (a square submatrix);
4457: each processor's off-diagonal portion encompasses the remainder of the
4458: local matrix (a rectangular submatrix).
4460: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4462: When calling this routine with a single process communicator, a matrix of
4463: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4464: type of communicator, use the construction mechanism
4465: .vb
4466: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4467: .ve
4469: $ MatCreate(...,&A);
4470: $ MatSetType(A,MATMPIAIJ);
4471: $ MatSetSizes(A, m,n,M,N);
4472: $ MatMPIAIJSetPreallocation(A,...);
4474: By default, this format uses inodes (identical nodes) when possible.
4475: We search for consecutive rows with the same nonzero structure, thereby
4476: reusing matrix information to achieve increased efficiency.
4478: Options Database Keys:
4479: + -mat_no_inode - Do not use inodes
4480: - -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4484: Example usage:
4486: Consider the following 8x8 matrix with 34 non-zero values, that is
4487: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4488: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4489: as follows
4491: .vb
4492: 1 2 0 | 0 3 0 | 0 4
4493: Proc0 0 5 6 | 7 0 0 | 8 0
4494: 9 0 10 | 11 0 0 | 12 0
4495: -------------------------------------
4496: 13 0 14 | 15 16 17 | 0 0
4497: Proc1 0 18 0 | 19 20 21 | 0 0
4498: 0 0 0 | 22 23 0 | 24 0
4499: -------------------------------------
4500: Proc2 25 26 27 | 0 0 28 | 29 0
4501: 30 0 0 | 31 32 33 | 0 34
4502: .ve
4504: This can be represented as a collection of submatrices as
4506: .vb
4507: A B C
4508: D E F
4509: G H I
4510: .ve
4512: Where the submatrices A,B,C are owned by proc0, D,E,F are
4513: owned by proc1, G,H,I are owned by proc2.
4515: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4516: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4517: The 'M','N' parameters are 8,8, and have the same values on all procs.
4519: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4520: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4521: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4522: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4523: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4524: matrix, ans [DF] as another SeqAIJ matrix.
4526: When d_nz, o_nz parameters are specified, d_nz storage elements are
4527: allocated for every row of the local diagonal submatrix, and o_nz
4528: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4529: One way to choose d_nz and o_nz is to use the max nonzerors per local
4530: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4531: In this case, the values of d_nz,o_nz are
4532: .vb
4533: proc0 : dnz = 2, o_nz = 2
4534: proc1 : dnz = 3, o_nz = 2
4535: proc2 : dnz = 1, o_nz = 4
4536: .ve
4537: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4538: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4539: for proc3. i.e we are using 12+15+10=37 storage locations to store
4540: 34 values.
4542: When d_nnz, o_nnz parameters are specified, the storage is specified
4543: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4544: In the above case the values for d_nnz,o_nnz are
4545: .vb
4546: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4547: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4548: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4549: .ve
4550: Here the space allocated is sum of all the above values i.e 34, and
4551: hence pre-allocation is perfect.
4553: Level: intermediate
4555: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4556: MATMPIAIJ, MatCreateMPIAIJWithArrays()
4557: @*/
4558: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4559: {
4561: PetscMPIInt size;
4564: MatCreate(comm,A);
4565: MatSetSizes(*A,m,n,M,N);
4566: MPI_Comm_size(comm,&size);
4567: if (size > 1) {
4568: MatSetType(*A,MATMPIAIJ);
4569: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4570: } else {
4571: MatSetType(*A,MATSEQAIJ);
4572: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4573: }
4574: return(0);
4575: }
4577: /*@C
4578: MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4580: Not collective
4582: Input Parameter:
4583: . A - The MPIAIJ matrix
4585: Output Parameters:
4586: + Ad - The local diagonal block as a SeqAIJ matrix
4587: . Ao - The local off-diagonal block as a SeqAIJ matrix
4588: - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4590: Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4591: in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4592: the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4593: local column numbers to global column numbers in the original matrix.
4595: Level: intermediate
4597: .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4598: @*/
4599: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4600: {
4601: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4602: PetscBool flg;
4606: PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);
4607: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4608: if (Ad) *Ad = a->A;
4609: if (Ao) *Ao = a->B;
4610: if (colmap) *colmap = a->garray;
4611: return(0);
4612: }
4614: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4615: {
4617: PetscInt m,N,i,rstart,nnz,Ii;
4618: PetscInt *indx;
4619: PetscScalar *values;
4622: MatGetSize(inmat,&m,&N);
4623: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4624: PetscInt *dnz,*onz,sum,bs,cbs;
4626: if (n == PETSC_DECIDE) {
4627: PetscSplitOwnership(comm,&n,&N);
4628: }
4629: /* Check sum(n) = N */
4630: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4631: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4633: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4634: rstart -= m;
4636: MatPreallocateInitialize(comm,m,n,dnz,onz);
4637: for (i=0; i<m; i++) {
4638: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4639: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4640: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4641: }
4643: MatCreate(comm,outmat);
4644: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4645: MatGetBlockSizes(inmat,&bs,&cbs);
4646: MatSetBlockSizes(*outmat,bs,cbs);
4647: MatSetType(*outmat,MATAIJ);
4648: MatSeqAIJSetPreallocation(*outmat,0,dnz);
4649: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4650: MatPreallocateFinalize(dnz,onz);
4651: }
4653: /* numeric phase */
4654: MatGetOwnershipRange(*outmat,&rstart,NULL);
4655: for (i=0; i<m; i++) {
4656: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4657: Ii = i + rstart;
4658: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4659: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4660: }
4661: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4662: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4663: return(0);
4664: }
4666: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4667: {
4668: PetscErrorCode ierr;
4669: PetscMPIInt rank;
4670: PetscInt m,N,i,rstart,nnz;
4671: size_t len;
4672: const PetscInt *indx;
4673: PetscViewer out;
4674: char *name;
4675: Mat B;
4676: const PetscScalar *values;
4679: MatGetLocalSize(A,&m,NULL);
4680: MatGetSize(A,NULL,&N);
4681: /* Should this be the type of the diagonal block of A? */
4682: MatCreate(PETSC_COMM_SELF,&B);
4683: MatSetSizes(B,m,N,m,N);
4684: MatSetBlockSizesFromMats(B,A,A);
4685: MatSetType(B,MATSEQAIJ);
4686: MatSeqAIJSetPreallocation(B,0,NULL);
4687: MatGetOwnershipRange(A,&rstart,NULL);
4688: for (i=0; i<m; i++) {
4689: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4690: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4691: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4692: }
4693: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4694: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4696: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4697: PetscStrlen(outfile,&len);
4698: PetscMalloc1(len+6,&name);
4699: PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);
4700: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4701: PetscFree(name);
4702: MatView(B,out);
4703: PetscViewerDestroy(&out);
4704: MatDestroy(&B);
4705: return(0);
4706: }
4708: static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4709: {
4710: PetscErrorCode ierr;
4711: Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4714: if (!merge) return(0);
4715: PetscFree(merge->id_r);
4716: PetscFree(merge->len_s);
4717: PetscFree(merge->len_r);
4718: PetscFree(merge->bi);
4719: PetscFree(merge->bj);
4720: PetscFree(merge->buf_ri[0]);
4721: PetscFree(merge->buf_ri);
4722: PetscFree(merge->buf_rj[0]);
4723: PetscFree(merge->buf_rj);
4724: PetscFree(merge->coi);
4725: PetscFree(merge->coj);
4726: PetscFree(merge->owners_co);
4727: PetscLayoutDestroy(&merge->rowmap);
4728: PetscFree(merge);
4729: return(0);
4730: }
4732: #include <../src/mat/utils/freespace.h>
4733: #include <petscbt.h>
4735: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4736: {
4737: PetscErrorCode ierr;
4738: MPI_Comm comm;
4739: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4740: PetscMPIInt size,rank,taga,*len_s;
4741: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4742: PetscInt proc,m;
4743: PetscInt **buf_ri,**buf_rj;
4744: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4745: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4746: MPI_Request *s_waits,*r_waits;
4747: MPI_Status *status;
4748: MatScalar *aa=a->a;
4749: MatScalar **abuf_r,*ba_i;
4750: Mat_Merge_SeqsToMPI *merge;
4751: PetscContainer container;
4754: PetscObjectGetComm((PetscObject)mpimat,&comm);
4755: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4757: MPI_Comm_size(comm,&size);
4758: MPI_Comm_rank(comm,&rank);
4760: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4761: if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4762: PetscContainerGetPointer(container,(void**)&merge);
4764: bi = merge->bi;
4765: bj = merge->bj;
4766: buf_ri = merge->buf_ri;
4767: buf_rj = merge->buf_rj;
4769: PetscMalloc1(size,&status);
4770: owners = merge->rowmap->range;
4771: len_s = merge->len_s;
4773: /* send and recv matrix values */
4774: /*-----------------------------*/
4775: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4776: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4778: PetscMalloc1(merge->nsend+1,&s_waits);
4779: for (proc=0,k=0; proc<size; proc++) {
4780: if (!len_s[proc]) continue;
4781: i = owners[proc];
4782: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4783: k++;
4784: }
4786: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4787: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4788: PetscFree(status);
4790: PetscFree(s_waits);
4791: PetscFree(r_waits);
4793: /* insert mat values of mpimat */
4794: /*----------------------------*/
4795: PetscMalloc1(N,&ba_i);
4796: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4798: for (k=0; k<merge->nrecv; k++) {
4799: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4800: nrows = *(buf_ri_k[k]);
4801: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4802: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4803: }
4805: /* set values of ba */
4806: m = merge->rowmap->n;
4807: for (i=0; i<m; i++) {
4808: arow = owners[rank] + i;
4809: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4810: bnzi = bi[i+1] - bi[i];
4811: PetscArrayzero(ba_i,bnzi);
4813: /* add local non-zero vals of this proc's seqmat into ba */
4814: anzi = ai[arow+1] - ai[arow];
4815: aj = a->j + ai[arow];
4816: aa = a->a + ai[arow];
4817: nextaj = 0;
4818: for (j=0; nextaj<anzi; j++) {
4819: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4820: ba_i[j] += aa[nextaj++];
4821: }
4822: }
4824: /* add received vals into ba */
4825: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4826: /* i-th row */
4827: if (i == *nextrow[k]) {
4828: anzi = *(nextai[k]+1) - *nextai[k];
4829: aj = buf_rj[k] + *(nextai[k]);
4830: aa = abuf_r[k] + *(nextai[k]);
4831: nextaj = 0;
4832: for (j=0; nextaj<anzi; j++) {
4833: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4834: ba_i[j] += aa[nextaj++];
4835: }
4836: }
4837: nextrow[k]++; nextai[k]++;
4838: }
4839: }
4840: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4841: }
4842: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4843: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4845: PetscFree(abuf_r[0]);
4846: PetscFree(abuf_r);
4847: PetscFree(ba_i);
4848: PetscFree3(buf_ri_k,nextrow,nextai);
4849: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4850: return(0);
4851: }
4853: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4854: {
4855: PetscErrorCode ierr;
4856: Mat B_mpi;
4857: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4858: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4859: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4860: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4861: PetscInt len,proc,*dnz,*onz,bs,cbs;
4862: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4863: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4864: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4865: MPI_Status *status;
4866: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4867: PetscBT lnkbt;
4868: Mat_Merge_SeqsToMPI *merge;
4869: PetscContainer container;
4872: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4874: /* make sure it is a PETSc comm */
4875: PetscCommDuplicate(comm,&comm,NULL);
4876: MPI_Comm_size(comm,&size);
4877: MPI_Comm_rank(comm,&rank);
4879: PetscNew(&merge);
4880: PetscMalloc1(size,&status);
4882: /* determine row ownership */
4883: /*---------------------------------------------------------*/
4884: PetscLayoutCreate(comm,&merge->rowmap);
4885: PetscLayoutSetLocalSize(merge->rowmap,m);
4886: PetscLayoutSetSize(merge->rowmap,M);
4887: PetscLayoutSetBlockSize(merge->rowmap,1);
4888: PetscLayoutSetUp(merge->rowmap);
4889: PetscMalloc1(size,&len_si);
4890: PetscMalloc1(size,&merge->len_s);
4892: m = merge->rowmap->n;
4893: owners = merge->rowmap->range;
4895: /* determine the number of messages to send, their lengths */
4896: /*---------------------------------------------------------*/
4897: len_s = merge->len_s;
4899: len = 0; /* length of buf_si[] */
4900: merge->nsend = 0;
4901: for (proc=0; proc<size; proc++) {
4902: len_si[proc] = 0;
4903: if (proc == rank) {
4904: len_s[proc] = 0;
4905: } else {
4906: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4907: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4908: }
4909: if (len_s[proc]) {
4910: merge->nsend++;
4911: nrows = 0;
4912: for (i=owners[proc]; i<owners[proc+1]; i++) {
4913: if (ai[i+1] > ai[i]) nrows++;
4914: }
4915: len_si[proc] = 2*(nrows+1);
4916: len += len_si[proc];
4917: }
4918: }
4920: /* determine the number and length of messages to receive for ij-structure */
4921: /*-------------------------------------------------------------------------*/
4922: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4923: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4925: /* post the Irecv of j-structure */
4926: /*-------------------------------*/
4927: PetscCommGetNewTag(comm,&tagj);
4928: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4930: /* post the Isend of j-structure */
4931: /*--------------------------------*/
4932: PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);
4934: for (proc=0, k=0; proc<size; proc++) {
4935: if (!len_s[proc]) continue;
4936: i = owners[proc];
4937: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4938: k++;
4939: }
4941: /* receives and sends of j-structure are complete */
4942: /*------------------------------------------------*/
4943: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4944: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4946: /* send and recv i-structure */
4947: /*---------------------------*/
4948: PetscCommGetNewTag(comm,&tagi);
4949: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4951: PetscMalloc1(len+1,&buf_s);
4952: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4953: for (proc=0,k=0; proc<size; proc++) {
4954: if (!len_s[proc]) continue;
4955: /* form outgoing message for i-structure:
4956: buf_si[0]: nrows to be sent
4957: [1:nrows]: row index (global)
4958: [nrows+1:2*nrows+1]: i-structure index
4959: */
4960: /*-------------------------------------------*/
4961: nrows = len_si[proc]/2 - 1;
4962: buf_si_i = buf_si + nrows+1;
4963: buf_si[0] = nrows;
4964: buf_si_i[0] = 0;
4965: nrows = 0;
4966: for (i=owners[proc]; i<owners[proc+1]; i++) {
4967: anzi = ai[i+1] - ai[i];
4968: if (anzi) {
4969: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4970: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4971: nrows++;
4972: }
4973: }
4974: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4975: k++;
4976: buf_si += len_si[proc];
4977: }
4979: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4980: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4982: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4983: for (i=0; i<merge->nrecv; i++) {
4984: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4985: }
4987: PetscFree(len_si);
4988: PetscFree(len_ri);
4989: PetscFree(rj_waits);
4990: PetscFree2(si_waits,sj_waits);
4991: PetscFree(ri_waits);
4992: PetscFree(buf_s);
4993: PetscFree(status);
4995: /* compute a local seq matrix in each processor */
4996: /*----------------------------------------------*/
4997: /* allocate bi array and free space for accumulating nonzero column info */
4998: PetscMalloc1(m+1,&bi);
4999: bi[0] = 0;
5001: /* create and initialize a linked list */
5002: nlnk = N+1;
5003: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
5005: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5006: len = ai[owners[rank+1]] - ai[owners[rank]];
5007: PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);
5009: current_space = free_space;
5011: /* determine symbolic info for each local row */
5012: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
5014: for (k=0; k<merge->nrecv; k++) {
5015: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5016: nrows = *buf_ri_k[k];
5017: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
5018: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
5019: }
5021: MatPreallocateInitialize(comm,m,n,dnz,onz);
5022: len = 0;
5023: for (i=0; i<m; i++) {
5024: bnzi = 0;
5025: /* add local non-zero cols of this proc's seqmat into lnk */
5026: arow = owners[rank] + i;
5027: anzi = ai[arow+1] - ai[arow];
5028: aj = a->j + ai[arow];
5029: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
5030: bnzi += nlnk;
5031: /* add received col data into lnk */
5032: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5033: if (i == *nextrow[k]) { /* i-th row */
5034: anzi = *(nextai[k]+1) - *nextai[k];
5035: aj = buf_rj[k] + *nextai[k];
5036: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
5037: bnzi += nlnk;
5038: nextrow[k]++; nextai[k]++;
5039: }
5040: }
5041: if (len < bnzi) len = bnzi; /* =max(bnzi) */
5043: /* if free space is not available, make more free space */
5044: if (current_space->local_remaining<bnzi) {
5045: PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);
5046: nspacedouble++;
5047: }
5048: /* copy data into free space, then initialize lnk */
5049: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
5050: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
5052: current_space->array += bnzi;
5053: current_space->local_used += bnzi;
5054: current_space->local_remaining -= bnzi;
5056: bi[i+1] = bi[i] + bnzi;
5057: }
5059: PetscFree3(buf_ri_k,nextrow,nextai);
5061: PetscMalloc1(bi[m]+1,&bj);
5062: PetscFreeSpaceContiguous(&free_space,bj);
5063: PetscLLDestroy(lnk,lnkbt);
5065: /* create symbolic parallel matrix B_mpi */
5066: /*---------------------------------------*/
5067: MatGetBlockSizes(seqmat,&bs,&cbs);
5068: MatCreate(comm,&B_mpi);
5069: if (n==PETSC_DECIDE) {
5070: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
5071: } else {
5072: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5073: }
5074: MatSetBlockSizes(B_mpi,bs,cbs);
5075: MatSetType(B_mpi,MATMPIAIJ);
5076: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
5077: MatPreallocateFinalize(dnz,onz);
5078: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
5080: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5081: B_mpi->assembled = PETSC_FALSE;
5082: merge->bi = bi;
5083: merge->bj = bj;
5084: merge->buf_ri = buf_ri;
5085: merge->buf_rj = buf_rj;
5086: merge->coi = NULL;
5087: merge->coj = NULL;
5088: merge->owners_co = NULL;
5090: PetscCommDestroy(&comm);
5092: /* attach the supporting struct to B_mpi for reuse */
5093: PetscContainerCreate(PETSC_COMM_SELF,&container);
5094: PetscContainerSetPointer(container,merge);
5095: PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);
5096: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
5097: PetscContainerDestroy(&container);
5098: *mpimat = B_mpi;
5100: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
5101: return(0);
5102: }
5104: /*@C
5105: MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5106: matrices from each processor
5108: Collective
5110: Input Parameters:
5111: + comm - the communicators the parallel matrix will live on
5112: . seqmat - the input sequential matrices
5113: . m - number of local rows (or PETSC_DECIDE)
5114: . n - number of local columns (or PETSC_DECIDE)
5115: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5117: Output Parameter:
5118: . mpimat - the parallel matrix generated
5120: Level: advanced
5122: Notes:
5123: The dimensions of the sequential matrix in each processor MUST be the same.
5124: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5125: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5126: @*/
5127: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5128: {
5130: PetscMPIInt size;
5133: MPI_Comm_size(comm,&size);
5134: if (size == 1) {
5135: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
5136: if (scall == MAT_INITIAL_MATRIX) {
5137: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
5138: } else {
5139: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
5140: }
5141: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
5142: return(0);
5143: }
5144: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
5145: if (scall == MAT_INITIAL_MATRIX) {
5146: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
5147: }
5148: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
5149: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
5150: return(0);
5151: }
5153: /*@
5154: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5155: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5156: with MatGetSize()
5158: Not Collective
5160: Input Parameters:
5161: + A - the matrix
5162: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5164: Output Parameter:
5165: . A_loc - the local sequential matrix generated
5167: Level: developer
5169: Notes:
5170: When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5171: If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5172: This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5173: modify the values of the returned A_loc.
5175: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5177: @*/
5178: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5179: {
5181: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
5182: Mat_SeqAIJ *mat,*a,*b;
5183: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5184: MatScalar *aa,*ba,*cam;
5185: PetscScalar *ca;
5186: PetscMPIInt size;
5187: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5188: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
5189: PetscBool match;
5192: PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);
5193: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5194: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
5195: if (size == 1) {
5196: if (scall == MAT_INITIAL_MATRIX) {
5197: PetscObjectReference((PetscObject)mpimat->A);
5198: *A_loc = mpimat->A;
5199: } else if (scall == MAT_REUSE_MATRIX) {
5200: MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);
5201: }
5202: return(0);
5203: }
5205: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
5206: a = (Mat_SeqAIJ*)(mpimat->A)->data;
5207: b = (Mat_SeqAIJ*)(mpimat->B)->data;
5208: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5209: aa = a->a; ba = b->a;
5210: if (scall == MAT_INITIAL_MATRIX) {
5211: PetscMalloc1(1+am,&ci);
5212: ci[0] = 0;
5213: for (i=0; i<am; i++) {
5214: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5215: }
5216: PetscMalloc1(1+ci[am],&cj);
5217: PetscMalloc1(1+ci[am],&ca);
5218: k = 0;
5219: for (i=0; i<am; i++) {
5220: ncols_o = bi[i+1] - bi[i];
5221: ncols_d = ai[i+1] - ai[i];
5222: /* off-diagonal portion of A */
5223: for (jo=0; jo<ncols_o; jo++) {
5224: col = cmap[*bj];
5225: if (col >= cstart) break;
5226: cj[k] = col; bj++;
5227: ca[k++] = *ba++;
5228: }
5229: /* diagonal portion of A */
5230: for (j=0; j<ncols_d; j++) {
5231: cj[k] = cstart + *aj++;
5232: ca[k++] = *aa++;
5233: }
5234: /* off-diagonal portion of A */
5235: for (j=jo; j<ncols_o; j++) {
5236: cj[k] = cmap[*bj++];
5237: ca[k++] = *ba++;
5238: }
5239: }
5240: /* put together the new matrix */
5241: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
5242: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5243: /* Since these are PETSc arrays, change flags to free them as necessary. */
5244: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5245: mat->free_a = PETSC_TRUE;
5246: mat->free_ij = PETSC_TRUE;
5247: mat->nonew = 0;
5248: } else if (scall == MAT_REUSE_MATRIX) {
5249: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5250: ci = mat->i; cj = mat->j; cam = mat->a;
5251: for (i=0; i<am; i++) {
5252: /* off-diagonal portion of A */
5253: ncols_o = bi[i+1] - bi[i];
5254: for (jo=0; jo<ncols_o; jo++) {
5255: col = cmap[*bj];
5256: if (col >= cstart) break;
5257: *cam++ = *ba++; bj++;
5258: }
5259: /* diagonal portion of A */
5260: ncols_d = ai[i+1] - ai[i];
5261: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5262: /* off-diagonal portion of A */
5263: for (j=jo; j<ncols_o; j++) {
5264: *cam++ = *ba++; bj++;
5265: }
5266: }
5267: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5268: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5269: return(0);
5270: }
5272: /*@C
5273: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5275: Not Collective
5277: Input Parameters:
5278: + A - the matrix
5279: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5280: - row, col - index sets of rows and columns to extract (or NULL)
5282: Output Parameter:
5283: . A_loc - the local sequential matrix generated
5285: Level: developer
5287: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5289: @*/
5290: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5291: {
5292: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5294: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5295: IS isrowa,iscola;
5296: Mat *aloc;
5297: PetscBool match;
5300: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5301: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5302: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5303: if (!row) {
5304: start = A->rmap->rstart; end = A->rmap->rend;
5305: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5306: } else {
5307: isrowa = *row;
5308: }
5309: if (!col) {
5310: start = A->cmap->rstart;
5311: cmap = a->garray;
5312: nzA = a->A->cmap->n;
5313: nzB = a->B->cmap->n;
5314: PetscMalloc1(nzA+nzB, &idx);
5315: ncols = 0;
5316: for (i=0; i<nzB; i++) {
5317: if (cmap[i] < start) idx[ncols++] = cmap[i];
5318: else break;
5319: }
5320: imark = i;
5321: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5322: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5323: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5324: } else {
5325: iscola = *col;
5326: }
5327: if (scall != MAT_INITIAL_MATRIX) {
5328: PetscMalloc1(1,&aloc);
5329: aloc[0] = *A_loc;
5330: }
5331: MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5332: if (!col) { /* attach global id of condensed columns */
5333: PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);
5334: }
5335: *A_loc = aloc[0];
5336: PetscFree(aloc);
5337: if (!row) {
5338: ISDestroy(&isrowa);
5339: }
5340: if (!col) {
5341: ISDestroy(&iscola);
5342: }
5343: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5344: return(0);
5345: }
5347: /*
5348: * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5349: * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5350: * on a global size.
5351: * */
5352: PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5353: {
5354: Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data;
5355: Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5356: PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5357: PetscMPIInt owner;
5358: PetscSFNode *iremote,*oiremote;
5359: const PetscInt *lrowindices;
5360: PetscErrorCode ierr;
5361: PetscSF sf,osf;
5362: PetscInt pcstart,*roffsets,*loffsets,*pnnz,j;
5363: PetscInt ontotalcols,dntotalcols,ntotalcols,nout;
5364: MPI_Comm comm;
5365: ISLocalToGlobalMapping mapping;
5368: PetscObjectGetComm((PetscObject)P,&comm);
5369: /* plocalsize is the number of roots
5370: * nrows is the number of leaves
5371: * */
5372: MatGetLocalSize(P,&plocalsize,NULL);
5373: ISGetLocalSize(rows,&nrows);
5374: PetscCalloc1(nrows,&iremote);
5375: ISGetIndices(rows,&lrowindices);
5376: for (i=0;i<nrows;i++) {
5377: /* Find a remote index and an owner for a row
5378: * The row could be local or remote
5379: * */
5380: owner = 0;
5381: lidx = 0;
5382: PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);
5383: iremote[i].index = lidx;
5384: iremote[i].rank = owner;
5385: }
5386: /* Create SF to communicate how many nonzero columns for each row */
5387: PetscSFCreate(comm,&sf);
5388: /* SF will figure out the number of nonzero colunms for each row, and their
5389: * offsets
5390: * */
5391: PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
5392: PetscSFSetFromOptions(sf);
5393: PetscSFSetUp(sf);
5395: PetscCalloc1(2*(plocalsize+1),&roffsets);
5396: PetscCalloc1(2*plocalsize,&nrcols);
5397: PetscCalloc1(nrows,&pnnz);
5398: roffsets[0] = 0;
5399: roffsets[1] = 0;
5400: for (i=0;i<plocalsize;i++) {
5401: /* diag */
5402: nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5403: /* off diag */
5404: nrcols[i*2+1] = po->i[i+1] - po->i[i];
5405: /* compute offsets so that we relative location for each row */
5406: roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5407: roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5408: }
5409: PetscCalloc1(2*nrows,&nlcols);
5410: PetscCalloc1(2*nrows,&loffsets);
5411: /* 'r' means root, and 'l' means leaf */
5412: PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);
5413: PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);
5414: PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);
5415: PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);
5416: PetscSFDestroy(&sf);
5417: PetscFree(roffsets);
5418: PetscFree(nrcols);
5419: dntotalcols = 0;
5420: ontotalcols = 0;
5421: ncol = 0;
5422: for (i=0;i<nrows;i++) {
5423: pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5424: ncol = PetscMax(pnnz[i],ncol);
5425: /* diag */
5426: dntotalcols += nlcols[i*2+0];
5427: /* off diag */
5428: ontotalcols += nlcols[i*2+1];
5429: }
5430: /* We do not need to figure the right number of columns
5431: * since all the calculations will be done by going through the raw data
5432: * */
5433: MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);
5434: MatSetUp(*P_oth);
5435: PetscFree(pnnz);
5436: p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5437: /* diag */
5438: PetscCalloc1(dntotalcols,&iremote);
5439: /* off diag */
5440: PetscCalloc1(ontotalcols,&oiremote);
5441: /* diag */
5442: PetscCalloc1(dntotalcols,&ilocal);
5443: /* off diag */
5444: PetscCalloc1(ontotalcols,&oilocal);
5445: dntotalcols = 0;
5446: ontotalcols = 0;
5447: ntotalcols = 0;
5448: for (i=0;i<nrows;i++) {
5449: owner = 0;
5450: PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);
5451: /* Set iremote for diag matrix */
5452: for (j=0;j<nlcols[i*2+0];j++) {
5453: iremote[dntotalcols].index = loffsets[i*2+0] + j;
5454: iremote[dntotalcols].rank = owner;
5455: /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5456: ilocal[dntotalcols++] = ntotalcols++;
5457: }
5458: /* off diag */
5459: for (j=0;j<nlcols[i*2+1];j++) {
5460: oiremote[ontotalcols].index = loffsets[i*2+1] + j;
5461: oiremote[ontotalcols].rank = owner;
5462: oilocal[ontotalcols++] = ntotalcols++;
5463: }
5464: }
5465: ISRestoreIndices(rows,&lrowindices);
5466: PetscFree(loffsets);
5467: PetscFree(nlcols);
5468: PetscSFCreate(comm,&sf);
5469: /* P serves as roots and P_oth is leaves
5470: * Diag matrix
5471: * */
5472: PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
5473: PetscSFSetFromOptions(sf);
5474: PetscSFSetUp(sf);
5476: PetscSFCreate(comm,&osf);
5477: /* Off diag */
5478: PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);
5479: PetscSFSetFromOptions(osf);
5480: PetscSFSetUp(osf);
5481: /* We operate on the matrix internal data for saving memory */
5482: PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);
5483: PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);
5484: MatGetOwnershipRangeColumn(P,&pcstart,NULL);
5485: /* Convert to global indices for diag matrix */
5486: for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5487: PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);
5488: /* We want P_oth store global indices */
5489: ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);
5490: /* Use memory scalable approach */
5491: ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);
5492: ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);
5493: PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);
5494: PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);
5495: /* Convert back to local indices */
5496: for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5497: PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);
5498: nout = 0;
5499: ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);
5500: if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5501: ISLocalToGlobalMappingDestroy(&mapping);
5502: /* Exchange values */
5503: PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);
5504: PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);
5505: /* Stop PETSc from shrinking memory */
5506: for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5507: MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);
5508: MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);
5509: /* Attach PetscSF objects to P_oth so that we can reuse it later */
5510: PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);
5511: PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);
5512: PetscSFDestroy(&sf);
5513: PetscSFDestroy(&osf);
5514: return(0);
5515: }
5517: /*
5518: * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5519: * This supports MPIAIJ and MAIJ
5520: * */
5521: PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5522: {
5523: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5524: Mat_SeqAIJ *p_oth;
5525: Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5526: IS rows,map;
5527: PetscHMapI hamp;
5528: PetscInt i,htsize,*rowindices,off,*mapping,key,count;
5529: MPI_Comm comm;
5530: PetscSF sf,osf;
5531: PetscBool has;
5532: PetscErrorCode ierr;
5535: PetscObjectGetComm((PetscObject)A,&comm);
5536: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);
5537: /* If it is the first time, create an index set of off-diag nonzero columns of A,
5538: * and then create a submatrix (that often is an overlapping matrix)
5539: * */
5540: if (reuse == MAT_INITIAL_MATRIX) {
5541: /* Use a hash table to figure out unique keys */
5542: PetscHMapICreate(&hamp);
5543: PetscHMapIResize(hamp,a->B->cmap->n);
5544: PetscCalloc1(a->B->cmap->n,&mapping);
5545: count = 0;
5546: /* Assume that a->g is sorted, otherwise the following does not make sense */
5547: for (i=0;i<a->B->cmap->n;i++) {
5548: key = a->garray[i]/dof;
5549: PetscHMapIHas(hamp,key,&has);
5550: if (!has) {
5551: mapping[i] = count;
5552: PetscHMapISet(hamp,key,count++);
5553: } else {
5554: /* Current 'i' has the same value the previous step */
5555: mapping[i] = count-1;
5556: }
5557: }
5558: ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);
5559: PetscHMapIGetSize(hamp,&htsize);
5560: if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5561: PetscCalloc1(htsize,&rowindices);
5562: off = 0;
5563: PetscHMapIGetKeys(hamp,&off,rowindices);
5564: PetscHMapIDestroy(&hamp);
5565: PetscSortInt(htsize,rowindices);
5566: ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);
5567: /* In case, the matrix was already created but users want to recreate the matrix */
5568: MatDestroy(P_oth);
5569: MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);
5570: PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);
5571: ISDestroy(&map);
5572: ISDestroy(&rows);
5573: } else if (reuse == MAT_REUSE_MATRIX) {
5574: /* If matrix was already created, we simply update values using SF objects
5575: * that as attached to the matrix ealier.
5576: * */
5577: PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);
5578: PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);
5579: if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5580: p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5581: /* Update values in place */
5582: PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);
5583: PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);
5584: PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);
5585: PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);
5586: } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5587: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);
5588: return(0);
5589: }
5591: /*@C
5592: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5594: Collective on Mat
5596: Input Parameters:
5597: + A,B - the matrices in mpiaij format
5598: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5599: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5601: Output Parameter:
5602: + rowb, colb - index sets of rows and columns of B to extract
5603: - B_seq - the sequential matrix generated
5605: Level: developer
5607: @*/
5608: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5609: {
5610: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5612: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5613: IS isrowb,iscolb;
5614: Mat *bseq=NULL;
5617: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5618: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5619: }
5620: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5622: if (scall == MAT_INITIAL_MATRIX) {
5623: start = A->cmap->rstart;
5624: cmap = a->garray;
5625: nzA = a->A->cmap->n;
5626: nzB = a->B->cmap->n;
5627: PetscMalloc1(nzA+nzB, &idx);
5628: ncols = 0;
5629: for (i=0; i<nzB; i++) { /* row < local row index */
5630: if (cmap[i] < start) idx[ncols++] = cmap[i];
5631: else break;
5632: }
5633: imark = i;
5634: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5635: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5636: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5637: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5638: } else {
5639: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5640: isrowb = *rowb; iscolb = *colb;
5641: PetscMalloc1(1,&bseq);
5642: bseq[0] = *B_seq;
5643: }
5644: MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5645: *B_seq = bseq[0];
5646: PetscFree(bseq);
5647: if (!rowb) {
5648: ISDestroy(&isrowb);
5649: } else {
5650: *rowb = isrowb;
5651: }
5652: if (!colb) {
5653: ISDestroy(&iscolb);
5654: } else {
5655: *colb = iscolb;
5656: }
5657: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5658: return(0);
5659: }
5661: /*
5662: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5663: of the OFF-DIAGONAL portion of local A
5665: Collective on Mat
5667: Input Parameters:
5668: + A,B - the matrices in mpiaij format
5669: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5671: Output Parameter:
5672: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5673: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5674: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5675: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5677: Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5678: for this matrix. This is not desirable..
5680: Level: developer
5682: */
5683: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5684: {
5685: PetscErrorCode ierr;
5686: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5687: Mat_SeqAIJ *b_oth;
5688: VecScatter ctx;
5689: MPI_Comm comm;
5690: const PetscMPIInt *rprocs,*sprocs;
5691: const PetscInt *srow,*rstarts,*sstarts;
5692: PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5693: PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5694: PetscScalar *b_otha,*bufa,*bufA,*vals = NULL;
5695: MPI_Request *rwaits = NULL,*swaits = NULL;
5696: MPI_Status rstatus;
5697: PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi;
5698: PETSC_UNUSED PetscMPIInt jj;
5701: PetscObjectGetComm((PetscObject)A,&comm);
5702: MPI_Comm_size(comm,&size);
5704: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5705: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5706: }
5707: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5708: MPI_Comm_rank(comm,&rank);
5710: if (size == 1) {
5711: startsj_s = NULL;
5712: bufa_ptr = NULL;
5713: *B_oth = NULL;
5714: return(0);
5715: }
5717: ctx = a->Mvctx;
5718: tag = ((PetscObject)ctx)->tag;
5720: if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5721: VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);
5722: /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5723: VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);
5724: PetscMPIIntCast(nsends,&nsends_mpi);
5725: PetscMPIIntCast(nrecvs,&nrecvs_mpi);
5726: PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);
5728: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5729: if (scall == MAT_INITIAL_MATRIX) {
5730: /* i-array */
5731: /*---------*/
5732: /* post receives */
5733: if (nrecvs) {PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);} /* rstarts can be NULL when nrecvs=0 */
5734: for (i=0; i<nrecvs; i++) {
5735: rowlen = rvalues + rstarts[i]*rbs;
5736: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5737: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5738: }
5740: /* pack the outgoing message */
5741: PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);
5743: sstartsj[0] = 0;
5744: rstartsj[0] = 0;
5745: len = 0; /* total length of j or a array to be sent */
5746: if (nsends) {
5747: k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5748: PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);
5749: }
5750: for (i=0; i<nsends; i++) {
5751: rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5752: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5753: for (j=0; j<nrows; j++) {
5754: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5755: for (l=0; l<sbs; l++) {
5756: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5758: rowlen[j*sbs+l] = ncols;
5760: len += ncols;
5761: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5762: }
5763: k++;
5764: }
5765: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5767: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5768: }
5769: /* recvs and sends of i-array are completed */
5770: i = nrecvs;
5771: while (i--) {
5772: MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);
5773: }
5774: if (nsends) {MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);}
5775: PetscFree(svalues);
5777: /* allocate buffers for sending j and a arrays */
5778: PetscMalloc1(len+1,&bufj);
5779: PetscMalloc1(len+1,&bufa);
5781: /* create i-array of B_oth */
5782: PetscMalloc1(aBn+2,&b_othi);
5784: b_othi[0] = 0;
5785: len = 0; /* total length of j or a array to be received */
5786: k = 0;
5787: for (i=0; i<nrecvs; i++) {
5788: rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5789: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5790: for (j=0; j<nrows; j++) {
5791: b_othi[k+1] = b_othi[k] + rowlen[j];
5792: PetscIntSumError(rowlen[j],len,&len);
5793: k++;
5794: }
5795: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5796: }
5797: PetscFree(rvalues);
5799: /* allocate space for j and a arrrays of B_oth */
5800: PetscMalloc1(b_othi[aBn]+1,&b_othj);
5801: PetscMalloc1(b_othi[aBn]+1,&b_otha);
5803: /* j-array */
5804: /*---------*/
5805: /* post receives of j-array */
5806: for (i=0; i<nrecvs; i++) {
5807: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5808: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5809: }
5811: /* pack the outgoing message j-array */
5812: if (nsends) k = sstarts[0];
5813: for (i=0; i<nsends; i++) {
5814: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5815: bufJ = bufj+sstartsj[i];
5816: for (j=0; j<nrows; j++) {
5817: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5818: for (ll=0; ll<sbs; ll++) {
5819: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5820: for (l=0; l<ncols; l++) {
5821: *bufJ++ = cols[l];
5822: }
5823: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5824: }
5825: }
5826: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5827: }
5829: /* recvs and sends of j-array are completed */
5830: i = nrecvs;
5831: while (i--) {
5832: MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);
5833: }
5834: if (nsends) {MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);}
5835: } else if (scall == MAT_REUSE_MATRIX) {
5836: sstartsj = *startsj_s;
5837: rstartsj = *startsj_r;
5838: bufa = *bufa_ptr;
5839: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5840: b_otha = b_oth->a;
5841: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5843: /* a-array */
5844: /*---------*/
5845: /* post receives of a-array */
5846: for (i=0; i<nrecvs; i++) {
5847: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5848: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5849: }
5851: /* pack the outgoing message a-array */
5852: if (nsends) k = sstarts[0];
5853: for (i=0; i<nsends; i++) {
5854: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5855: bufA = bufa+sstartsj[i];
5856: for (j=0; j<nrows; j++) {
5857: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5858: for (ll=0; ll<sbs; ll++) {
5859: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5860: for (l=0; l<ncols; l++) {
5861: *bufA++ = vals[l];
5862: }
5863: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5864: }
5865: }
5866: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5867: }
5868: /* recvs and sends of a-array are completed */
5869: i = nrecvs;
5870: while (i--) {
5871: MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);
5872: }
5873: if (nsends) {MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);}
5874: PetscFree2(rwaits,swaits);
5876: if (scall == MAT_INITIAL_MATRIX) {
5877: /* put together the new matrix */
5878: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5880: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5881: /* Since these are PETSc arrays, change flags to free them as necessary. */
5882: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5883: b_oth->free_a = PETSC_TRUE;
5884: b_oth->free_ij = PETSC_TRUE;
5885: b_oth->nonew = 0;
5887: PetscFree(bufj);
5888: if (!startsj_s || !bufa_ptr) {
5889: PetscFree2(sstartsj,rstartsj);
5890: PetscFree(bufa_ptr);
5891: } else {
5892: *startsj_s = sstartsj;
5893: *startsj_r = rstartsj;
5894: *bufa_ptr = bufa;
5895: }
5896: }
5898: VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);
5899: VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);
5900: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5901: return(0);
5902: }
5904: /*@C
5905: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5907: Not Collective
5909: Input Parameters:
5910: . A - The matrix in mpiaij format
5912: Output Parameter:
5913: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5914: . colmap - A map from global column index to local index into lvec
5915: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5917: Level: developer
5919: @*/
5920: #if defined(PETSC_USE_CTABLE)
5921: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5922: #else
5923: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5924: #endif
5925: {
5926: Mat_MPIAIJ *a;
5933: a = (Mat_MPIAIJ*) A->data;
5934: if (lvec) *lvec = a->lvec;
5935: if (colmap) *colmap = a->colmap;
5936: if (multScatter) *multScatter = a->Mvctx;
5937: return(0);
5938: }
5940: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5941: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5942: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5943: #if defined(PETSC_HAVE_MKL_SPARSE)
5944: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5945: #endif
5946: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5947: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5948: #if defined(PETSC_HAVE_ELEMENTAL)
5949: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5950: #endif
5951: #if defined(PETSC_HAVE_SCALAPACK)
5952: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5953: #endif
5954: #if defined(PETSC_HAVE_HYPRE)
5955: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5956: #endif
5957: #if defined(PETSC_HAVE_CUDA)
5958: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5959: #endif
5960: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5961: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5962: #endif
5963: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5964: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5965: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5967: /*
5968: Computes (B'*A')' since computing B*A directly is untenable
5970: n p p
5971: [ ] [ ] [ ]
5972: m [ A ] * n [ B ] = m [ C ]
5973: [ ] [ ] [ ]
5975: */
5976: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5977: {
5979: Mat At,Bt,Ct;
5982: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5983: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5984: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);
5985: MatDestroy(&At);
5986: MatDestroy(&Bt);
5987: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5988: MatDestroy(&Ct);
5989: return(0);
5990: }
5992: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5993: {
5995: PetscBool cisdense;
5998: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5999: MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);
6000: MatSetBlockSizesFromMats(C,A,B);
6001: PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");
6002: if (!cisdense) {
6003: MatSetType(C,((PetscObject)A)->type_name);
6004: }
6005: MatSetUp(C);
6007: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6008: return(0);
6009: }
6011: /* ----------------------------------------------------------------*/
6012: static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6013: {
6014: Mat_Product *product = C->product;
6015: Mat A = product->A,B=product->B;
6018: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6019: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6021: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6022: C->ops->productsymbolic = MatProductSymbolic_AB;
6023: return(0);
6024: }
6026: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6027: {
6029: Mat_Product *product = C->product;
6032: if (product->type == MATPRODUCT_AB) {
6033: MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);
6034: }
6035: return(0);
6036: }
6037: /* ----------------------------------------------------------------*/
6039: /*MC
6040: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6042: Options Database Keys:
6043: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6045: Level: beginner
6047: Notes:
6048: MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6049: in this case the values associated with the rows and columns one passes in are set to zero
6050: in the matrix
6052: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6053: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6055: .seealso: MatCreateAIJ()
6056: M*/
6058: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6059: {
6060: Mat_MPIAIJ *b;
6062: PetscMPIInt size;
6065: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
6067: PetscNewLog(B,&b);
6068: B->data = (void*)b;
6069: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
6070: B->assembled = PETSC_FALSE;
6071: B->insertmode = NOT_SET_VALUES;
6072: b->size = size;
6074: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
6076: /* build cache for off array entries formed */
6077: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
6079: b->donotstash = PETSC_FALSE;
6080: b->colmap = NULL;
6081: b->garray = NULL;
6082: b->roworiented = PETSC_TRUE;
6084: /* stuff used for matrix vector multiply */
6085: b->lvec = NULL;
6086: b->Mvctx = NULL;
6088: /* stuff for MatGetRow() */
6089: b->rowindices = NULL;
6090: b->rowvalues = NULL;
6091: b->getrowactive = PETSC_FALSE;
6093: /* flexible pointer used in CUSPARSE classes */
6094: b->spptr = NULL;
6096: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);
6097: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
6098: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
6099: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
6100: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
6101: PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);
6102: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
6103: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
6104: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
6105: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);
6106: #if defined(PETSC_HAVE_CUDA)
6107: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);
6108: #endif
6109: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6110: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);
6111: #endif
6112: #if defined(PETSC_HAVE_MKL_SPARSE)
6113: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);
6114: #endif
6115: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
6116: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);
6117: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
6118: #if defined(PETSC_HAVE_ELEMENTAL)
6119: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);
6120: #endif
6121: #if defined(PETSC_HAVE_SCALAPACK)
6122: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);
6123: #endif
6124: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);
6125: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);
6126: #if defined(PETSC_HAVE_HYPRE)
6127: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);
6128: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);
6129: #endif
6130: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);
6131: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);
6132: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
6133: return(0);
6134: }
6136: /*@C
6137: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6138: and "off-diagonal" part of the matrix in CSR format.
6140: Collective
6142: Input Parameters:
6143: + comm - MPI communicator
6144: . m - number of local rows (Cannot be PETSC_DECIDE)
6145: . n - This value should be the same as the local size used in creating the
6146: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6147: calculated if N is given) For square matrices n is almost always m.
6148: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6149: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6150: . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6151: . j - column indices
6152: . a - matrix values
6153: . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6154: . oj - column indices
6155: - oa - matrix values
6157: Output Parameter:
6158: . mat - the matrix
6160: Level: advanced
6162: Notes:
6163: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6164: must free the arrays once the matrix has been destroyed and not before.
6166: The i and j indices are 0 based
6168: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6170: This sets local rows and cannot be used to set off-processor values.
6172: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6173: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6174: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6175: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6176: keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6177: communication if it is known that only local entries will be set.
6179: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6180: MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6181: @*/
6182: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6183: {
6185: Mat_MPIAIJ *maij;
6188: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6189: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6190: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6191: MatCreate(comm,mat);
6192: MatSetSizes(*mat,m,n,M,N);
6193: MatSetType(*mat,MATMPIAIJ);
6194: maij = (Mat_MPIAIJ*) (*mat)->data;
6196: (*mat)->preallocated = PETSC_TRUE;
6198: PetscLayoutSetUp((*mat)->rmap);
6199: PetscLayoutSetUp((*mat)->cmap);
6201: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
6202: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
6204: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
6205: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
6206: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
6207: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
6209: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
6210: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
6211: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
6212: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
6213: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
6214: return(0);
6215: }
6217: /*
6218: Special version for direct calls from Fortran
6219: */
6220: #include <petsc/private/fortranimpl.h>
6222: /* Change these macros so can be used in void function */
6223: #undef CHKERRQ
6224: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6225: #undef SETERRQ2
6226: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6227: #undef SETERRQ3
6228: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6229: #undef SETERRQ
6230: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6232: #if defined(PETSC_HAVE_FORTRAN_CAPS)
6233: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6234: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6235: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6236: #else
6237: #endif
6238: PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6239: {
6240: Mat mat = *mmat;
6241: PetscInt m = *mm, n = *mn;
6242: InsertMode addv = *maddv;
6243: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
6244: PetscScalar value;
6247: MatCheckPreallocated(mat,1);
6248: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6249: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6250: {
6251: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
6252: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6253: PetscBool roworiented = aij->roworiented;
6255: /* Some Variables required in the macro */
6256: Mat A = aij->A;
6257: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
6258: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6259: MatScalar *aa = a->a;
6260: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6261: Mat B = aij->B;
6262: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
6263: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6264: MatScalar *ba = b->a;
6265: /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6266: * cannot use "#if defined" inside a macro. */
6267: PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6269: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6270: PetscInt nonew = a->nonew;
6271: MatScalar *ap1,*ap2;
6274: for (i=0; i<m; i++) {
6275: if (im[i] < 0) continue;
6276: if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6277: if (im[i] >= rstart && im[i] < rend) {
6278: row = im[i] - rstart;
6279: lastcol1 = -1;
6280: rp1 = aj + ai[row];
6281: ap1 = aa + ai[row];
6282: rmax1 = aimax[row];
6283: nrow1 = ailen[row];
6284: low1 = 0;
6285: high1 = nrow1;
6286: lastcol2 = -1;
6287: rp2 = bj + bi[row];
6288: ap2 = ba + bi[row];
6289: rmax2 = bimax[row];
6290: nrow2 = bilen[row];
6291: low2 = 0;
6292: high2 = nrow2;
6294: for (j=0; j<n; j++) {
6295: if (roworiented) value = v[i*n+j];
6296: else value = v[i+j*m];
6297: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6298: if (in[j] >= cstart && in[j] < cend) {
6299: col = in[j] - cstart;
6300: MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6301: #if defined(PETSC_HAVE_DEVICE)
6302: if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6303: #endif
6304: } else if (in[j] < 0) continue;
6305: else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6306: /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6307: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6308: } else {
6309: if (mat->was_assembled) {
6310: if (!aij->colmap) {
6311: MatCreateColmap_MPIAIJ_Private(mat);
6312: }
6313: #if defined(PETSC_USE_CTABLE)
6314: PetscTableFind(aij->colmap,in[j]+1,&col);
6315: col--;
6316: #else
6317: col = aij->colmap[in[j]] - 1;
6318: #endif
6319: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6320: MatDisAssemble_MPIAIJ(mat);
6321: col = in[j];
6322: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6323: B = aij->B;
6324: b = (Mat_SeqAIJ*)B->data;
6325: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6326: rp2 = bj + bi[row];
6327: ap2 = ba + bi[row];
6328: rmax2 = bimax[row];
6329: nrow2 = bilen[row];
6330: low2 = 0;
6331: high2 = nrow2;
6332: bm = aij->B->rmap->n;
6333: ba = b->a;
6334: inserted = PETSC_FALSE;
6335: }
6336: } else col = in[j];
6337: MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6338: #if defined(PETSC_HAVE_DEVICE)
6339: if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6340: #endif
6341: }
6342: }
6343: } else if (!aij->donotstash) {
6344: if (roworiented) {
6345: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
6346: } else {
6347: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
6348: }
6349: }
6350: }
6351: }
6352: PetscFunctionReturnVoid();
6353: }