Actual source code: mpiov.c
petsc-3.5.4 2015-05-23
2: /*
3: Routines to compute overlapping regions of a parallel MPI matrix
4: and to find submatrices that were shared across processors.
5: */
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscbt.h>
9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*);
10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**);
11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*);
12: extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
13: extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
18: {
20: PetscInt i;
23: if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24: for (i=0; i<ov; ++i) {
25: MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
26: }
27: return(0);
28: }
30: /*
31: Sample message format:
32: If a processor A wants processor B to process some elements corresponding
33: to index sets is[1],is[5]
34: mesg [0] = 2 (no of index sets in the mesg)
35: -----------
36: mesg [1] = 1 => is[1]
37: mesg [2] = sizeof(is[1]);
38: -----------
39: mesg [3] = 5 => is[5]
40: mesg [4] = sizeof(is[5]);
41: -----------
42: mesg [5]
43: mesg [n] datas[1]
44: -----------
45: mesg[n+1]
46: mesg[m] data(is[5])
47: -----------
49: Notes:
50: nrqs - no of requests sent (or to be sent out)
51: nrqr - no of requests recieved (which have to be or which have been processed
52: */
55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
56: {
57: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
58: PetscMPIInt *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
59: const PetscInt **idx,*idx_i;
60: PetscInt *n,**data,len;
62: PetscMPIInt size,rank,tag1,tag2;
63: PetscInt M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
64: PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
65: PetscBT *table;
66: MPI_Comm comm;
67: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
68: MPI_Status *s_status,*recv_status;
69: char *t_p;
72: PetscObjectGetComm((PetscObject)C,&comm);
73: size = c->size;
74: rank = c->rank;
75: M = C->rmap->N;
77: PetscObjectGetNewTag((PetscObject)C,&tag1);
78: PetscObjectGetNewTag((PetscObject)C,&tag2);
80: PetscMalloc2(imax,&idx,imax,&n);
82: for (i=0; i<imax; i++) {
83: ISGetIndices(is[i],&idx[i]);
84: ISGetLocalSize(is[i],&n[i]);
85: }
87: /* evaluate communication - mesg to who,length of mesg, and buffer space
88: required. Based on this, buffers are allocated, and data copied into them*/
89: PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);
90: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
91: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
92: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
93: for (i=0; i<imax; i++) {
94: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
95: idx_i = idx[i];
96: len = n[i];
97: for (j=0; j<len; j++) {
98: row = idx_i[j];
99: if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100: PetscLayoutFindOwner(C->rmap,row,&proc);
101: w4[proc]++;
102: }
103: for (j=0; j<size; j++) {
104: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105: }
106: }
108: nrqs = 0; /* no of outgoing messages */
109: msz = 0; /* total mesg length (for all proc */
110: w1[rank] = 0; /* no mesg sent to intself */
111: w3[rank] = 0;
112: for (i=0; i<size; i++) {
113: if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114: }
115: /* pa - is list of processors to communicate with */
116: PetscMalloc1((nrqs+1),&pa);
117: for (i=0,j=0; i<size; i++) {
118: if (w1[i]) {pa[j] = i; j++;}
119: }
121: /* Each message would have a header = 1 + 2*(no of IS) + data */
122: for (i=0; i<nrqs; i++) {
123: j = pa[i];
124: w1[j] += w2[j] + 2*w3[j];
125: msz += w1[j];
126: }
128: /* Determine the number of messages to expect, their lengths, from from-ids */
129: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
132: /* Now post the Irecvs corresponding to these messages */
133: PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);
135: /* Allocate Memory for outgoing messages */
136: PetscMalloc4(size,&outdat,size,&ptr,msz,&tmp,size,&ctr);
137: PetscMemzero(outdat,size*sizeof(PetscInt*));
138: PetscMemzero(ptr,size*sizeof(PetscInt*));
140: {
141: PetscInt *iptr = tmp,ict = 0;
142: for (i=0; i<nrqs; i++) {
143: j = pa[i];
144: iptr += ict;
145: outdat[j] = iptr;
146: ict = w1[j];
147: }
148: }
150: /* Form the outgoing messages */
151: /*plug in the headers*/
152: for (i=0; i<nrqs; i++) {
153: j = pa[i];
154: outdat[j][0] = 0;
155: PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156: ptr[j] = outdat[j] + 2*w3[j] + 1;
157: }
159: /* Memory for doing local proc's work*/
160: {
161: PetscCalloc5(imax,&table, imax,&data, imax,&isz, M*imax,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,&t_p);
163: for (i=0; i<imax; i++) {
164: table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
165: data[i] = d_p + M*i;
166: }
167: }
169: /* Parse the IS and update local tables and the outgoing buf with the data*/
170: {
171: PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
172: PetscBT table_i;
174: for (i=0; i<imax; i++) {
175: PetscMemzero(ctr,size*sizeof(PetscInt));
176: n_i = n[i];
177: table_i = table[i];
178: idx_i = idx[i];
179: data_i = data[i];
180: isz_i = isz[i];
181: for (j=0; j<n_i; j++) { /* parse the indices of each IS */
182: row = idx_i[j];
183: PetscLayoutFindOwner(C->rmap,row,&proc);
184: if (proc != rank) { /* copy to the outgoing buffer */
185: ctr[proc]++;
186: *ptr[proc] = row;
187: ptr[proc]++;
188: } else if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; /* Update the local table */
189: }
190: /* Update the headers for the current IS */
191: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
192: if ((ctr_j = ctr[j])) {
193: outdat_j = outdat[j];
194: k = ++outdat_j[0];
195: outdat_j[2*k] = ctr_j;
196: outdat_j[2*k-1] = i;
197: }
198: }
199: isz[i] = isz_i;
200: }
201: }
203: /* Now post the sends */
204: PetscMalloc1((nrqs+1),&s_waits1);
205: for (i=0; i<nrqs; ++i) {
206: j = pa[i];
207: MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
208: }
210: /* No longer need the original indices*/
211: for (i=0; i<imax; ++i) {
212: ISRestoreIndices(is[i],idx+i);
213: }
214: PetscFree2(idx,n);
216: for (i=0; i<imax; ++i) {
217: ISDestroy(&is[i]);
218: }
220: /* Do Local work*/
221: MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);
223: /* Receive messages*/
224: PetscMalloc1((nrqr+1),&recv_status);
225: if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}
227: PetscMalloc1((nrqs+1),&s_status);
228: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
230: /* Phase 1 sends are complete - deallocate buffers */
231: PetscFree4(outdat,ptr,tmp,ctr);
232: PetscFree4(w1,w2,w3,w4);
234: PetscMalloc1((nrqr+1),&xdata);
235: PetscMalloc1((nrqr+1),&isz1);
236: MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
237: PetscFree(rbuf[0]);
238: PetscFree(rbuf);
241: /* Send the data back*/
242: /* Do a global reduction to know the buffer space req for incoming messages*/
243: {
244: PetscMPIInt *rw1;
246: PetscCalloc1(size,&rw1);
248: for (i=0; i<nrqr; ++i) {
249: proc = recv_status[i].MPI_SOURCE;
251: if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
252: rw1[proc] = isz1[i];
253: }
254: PetscFree(onodes1);
255: PetscFree(olengths1);
257: /* Determine the number of messages to expect, their lengths, from from-ids */
258: PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
259: PetscFree(rw1);
260: }
261: /* Now post the Irecvs corresponding to these messages */
262: PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);
264: /* Now post the sends */
265: PetscMalloc1((nrqr+1),&s_waits2);
266: for (i=0; i<nrqr; ++i) {
267: j = recv_status[i].MPI_SOURCE;
268: MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
269: }
271: /* receive work done on other processors*/
272: {
273: PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
274: PetscMPIInt idex;
275: PetscBT table_i;
276: MPI_Status *status2;
278: PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
279: for (i=0; i<nrqs; ++i) {
280: MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
281: /* Process the message*/
282: rbuf2_i = rbuf2[idex];
283: ct1 = 2*rbuf2_i[0]+1;
284: jmax = rbuf2[idex][0];
285: for (j=1; j<=jmax; j++) {
286: max = rbuf2_i[2*j];
287: is_no = rbuf2_i[2*j-1];
288: isz_i = isz[is_no];
289: data_i = data[is_no];
290: table_i = table[is_no];
291: for (k=0; k<max; k++,ct1++) {
292: row = rbuf2_i[ct1];
293: if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row;
294: }
295: isz[is_no] = isz_i;
296: }
297: }
299: if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
300: PetscFree(status2);
301: }
303: for (i=0; i<imax; ++i) {
304: ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
305: }
307: PetscFree(onodes2);
308: PetscFree(olengths2);
310: PetscFree(pa);
311: PetscFree(rbuf2[0]);
312: PetscFree(rbuf2);
313: PetscFree(s_waits1);
314: PetscFree(r_waits1);
315: PetscFree(s_waits2);
316: PetscFree(r_waits2);
317: PetscFree5(table,data,isz,d_p,t_p);
318: PetscFree(s_status);
319: PetscFree(recv_status);
320: PetscFree(xdata[0]);
321: PetscFree(xdata);
322: PetscFree(isz1);
323: return(0);
324: }
328: /*
329: MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do
330: the work on the local processor.
332: Inputs:
333: C - MAT_MPIAIJ;
334: imax - total no of index sets processed at a time;
335: table - an array of char - size = m bits.
337: Output:
338: isz - array containing the count of the solution elements corresponding
339: to each index set;
340: data - pointer to the solutions
341: */
342: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
343: {
344: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
345: Mat A = c->A,B = c->B;
346: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
347: PetscInt start,end,val,max,rstart,cstart,*ai,*aj;
348: PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
349: PetscBT table_i;
352: rstart = C->rmap->rstart;
353: cstart = C->cmap->rstart;
354: ai = a->i;
355: aj = a->j;
356: bi = b->i;
357: bj = b->j;
358: garray = c->garray;
361: for (i=0; i<imax; i++) {
362: data_i = data[i];
363: table_i = table[i];
364: isz_i = isz[i];
365: for (j=0,max=isz[i]; j<max; j++) {
366: row = data_i[j] - rstart;
367: start = ai[row];
368: end = ai[row+1];
369: for (k=start; k<end; k++) { /* Amat */
370: val = aj[k] + cstart;
371: if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
372: }
373: start = bi[row];
374: end = bi[row+1];
375: for (k=start; k<end; k++) { /* Bmat */
376: val = garray[bj[k]];
377: if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
378: }
379: }
380: isz[i] = isz_i;
381: }
382: return(0);
383: }
387: /*
388: MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
389: and return the output
391: Input:
392: C - the matrix
393: nrqr - no of messages being processed.
394: rbuf - an array of pointers to the recieved requests
396: Output:
397: xdata - array of messages to be sent back
398: isz1 - size of each message
400: For better efficiency perhaps we should malloc separately each xdata[i],
401: then if a remalloc is required we need only copy the data for that one row
402: rather then all previous rows as it is now where a single large chunck of
403: memory is used.
405: */
406: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
407: {
408: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
409: Mat A = c->A,B = c->B;
410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
412: PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
413: PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
414: PetscInt val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
415: PetscInt *rbuf_i,kmax,rbuf_0;
416: PetscBT xtable;
419: m = C->rmap->N;
420: rstart = C->rmap->rstart;
421: cstart = C->cmap->rstart;
422: ai = a->i;
423: aj = a->j;
424: bi = b->i;
425: bj = b->j;
426: garray = c->garray;
429: for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
430: rbuf_i = rbuf[i];
431: rbuf_0 = rbuf_i[0];
432: ct += rbuf_0;
433: for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j];
434: }
436: if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
437: else max1 = 1;
438: mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
439: PetscMalloc1(mem_estimate,&xdata[0]);
440: ++no_malloc;
441: PetscBTCreate(m,&xtable);
442: PetscMemzero(isz1,nrqr*sizeof(PetscInt));
444: ct3 = 0;
445: for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
446: rbuf_i = rbuf[i];
447: rbuf_0 = rbuf_i[0];
448: ct1 = 2*rbuf_0+1;
449: ct2 = ct1;
450: ct3 += ct1;
451: for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
452: PetscBTMemzero(m,xtable);
453: oct2 = ct2;
454: kmax = rbuf_i[2*j];
455: for (k=0; k<kmax; k++,ct1++) {
456: row = rbuf_i[ct1];
457: if (!PetscBTLookupSet(xtable,row)) {
458: if (!(ct3 < mem_estimate)) {
459: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
460: PetscMalloc1(new_estimate,&tmp);
461: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
462: PetscFree(xdata[0]);
463: xdata[0] = tmp;
464: mem_estimate = new_estimate; ++no_malloc;
465: for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
466: }
467: xdata[i][ct2++] = row;
468: ct3++;
469: }
470: }
471: for (k=oct2,max2=ct2; k<max2; k++) {
472: row = xdata[i][k] - rstart;
473: start = ai[row];
474: end = ai[row+1];
475: for (l=start; l<end; l++) {
476: val = aj[l] + cstart;
477: if (!PetscBTLookupSet(xtable,val)) {
478: if (!(ct3 < mem_estimate)) {
479: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
480: PetscMalloc1(new_estimate,&tmp);
481: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
482: PetscFree(xdata[0]);
483: xdata[0] = tmp;
484: mem_estimate = new_estimate; ++no_malloc;
485: for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
486: }
487: xdata[i][ct2++] = val;
488: ct3++;
489: }
490: }
491: start = bi[row];
492: end = bi[row+1];
493: for (l=start; l<end; l++) {
494: val = garray[bj[l]];
495: if (!PetscBTLookupSet(xtable,val)) {
496: if (!(ct3 < mem_estimate)) {
497: new_estimate = (PetscInt)(1.5*mem_estimate)+1;
498: PetscMalloc1(new_estimate,&tmp);
499: PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
500: PetscFree(xdata[0]);
501: xdata[0] = tmp;
502: mem_estimate = new_estimate; ++no_malloc;
503: for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
504: }
505: xdata[i][ct2++] = val;
506: ct3++;
507: }
508: }
509: }
510: /* Update the header*/
511: xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
512: xdata[i][2*j-1] = rbuf_i[2*j-1];
513: }
514: xdata[i][0] = rbuf_0;
515: xdata[i+1] = xdata[i] + ct2;
516: isz1[i] = ct2; /* size of each message */
517: }
518: PetscBTDestroy(&xtable);
519: PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
520: return(0);
521: }
522: /* -------------------------------------------------------------------------*/
523: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
524: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
525: /*
526: Every processor gets the entire matrix
527: */
530: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
531: {
532: Mat B;
533: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
534: Mat_SeqAIJ *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
536: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
537: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
538: PetscInt m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
539: MatScalar *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;
542: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
543: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
545: if (scall == MAT_INITIAL_MATRIX) {
546: /* ----------------------------------------------------------------
547: Tell every processor the number of nonzeros per row
548: */
549: PetscMalloc1(A->rmap->N,&lens);
550: for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
551: lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
552: }
553: sendcount = A->rmap->rend - A->rmap->rstart;
554: PetscMalloc2(size,&recvcounts,size,&displs);
555: for (i=0; i<size; i++) {
556: recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
557: displs[i] = A->rmap->range[i];
558: }
559: #if defined(PETSC_HAVE_MPI_IN_PLACE)
560: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
561: #else
562: MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
563: #endif
564: /* ---------------------------------------------------------------
565: Create the sequential matrix of the same type as the local block diagonal
566: */
567: MatCreate(PETSC_COMM_SELF,&B);
568: MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
569: MatSetBlockSizesFromMats(B,A,A);
570: MatSetType(B,((PetscObject)a->A)->type_name);
571: MatSeqAIJSetPreallocation(B,0,lens);
572: PetscMalloc(sizeof(Mat),Bin);
573: **Bin = B;
574: b = (Mat_SeqAIJ*)B->data;
576: /*--------------------------------------------------------------------
577: Copy my part of matrix column indices over
578: */
579: sendcount = ad->nz + bd->nz;
580: jsendbuf = b->j + b->i[rstarts[rank]];
581: a_jsendbuf = ad->j;
582: b_jsendbuf = bd->j;
583: n = A->rmap->rend - A->rmap->rstart;
584: cnt = 0;
585: for (i=0; i<n; i++) {
587: /* put in lower diagonal portion */
588: m = bd->i[i+1] - bd->i[i];
589: while (m > 0) {
590: /* is it above diagonal (in bd (compressed) numbering) */
591: if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
592: jsendbuf[cnt++] = garray[*b_jsendbuf++];
593: m--;
594: }
596: /* put in diagonal portion */
597: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
598: jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
599: }
601: /* put in upper diagonal portion */
602: while (m-- > 0) {
603: jsendbuf[cnt++] = garray[*b_jsendbuf++];
604: }
605: }
606: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
608: /*--------------------------------------------------------------------
609: Gather all column indices to all processors
610: */
611: for (i=0; i<size; i++) {
612: recvcounts[i] = 0;
613: for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
614: recvcounts[i] += lens[j];
615: }
616: }
617: displs[0] = 0;
618: for (i=1; i<size; i++) {
619: displs[i] = displs[i-1] + recvcounts[i-1];
620: }
621: #if defined(PETSC_HAVE_MPI_IN_PLACE)
622: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
623: #else
624: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
625: #endif
626: /*--------------------------------------------------------------------
627: Assemble the matrix into useable form (note numerical values not yet set)
628: */
629: /* set the b->ilen (length of each row) values */
630: PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
631: /* set the b->i indices */
632: b->i[0] = 0;
633: for (i=1; i<=A->rmap->N; i++) {
634: b->i[i] = b->i[i-1] + lens[i-1];
635: }
636: PetscFree(lens);
637: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
638: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
640: } else {
641: B = **Bin;
642: b = (Mat_SeqAIJ*)B->data;
643: }
645: /*--------------------------------------------------------------------
646: Copy my part of matrix numerical values into the values location
647: */
648: if (flag == MAT_GET_VALUES) {
649: sendcount = ad->nz + bd->nz;
650: sendbuf = b->a + b->i[rstarts[rank]];
651: a_sendbuf = ad->a;
652: b_sendbuf = bd->a;
653: b_sendj = bd->j;
654: n = A->rmap->rend - A->rmap->rstart;
655: cnt = 0;
656: for (i=0; i<n; i++) {
658: /* put in lower diagonal portion */
659: m = bd->i[i+1] - bd->i[i];
660: while (m > 0) {
661: /* is it above diagonal (in bd (compressed) numbering) */
662: if (garray[*b_sendj] > A->rmap->rstart + i) break;
663: sendbuf[cnt++] = *b_sendbuf++;
664: m--;
665: b_sendj++;
666: }
668: /* put in diagonal portion */
669: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
670: sendbuf[cnt++] = *a_sendbuf++;
671: }
673: /* put in upper diagonal portion */
674: while (m-- > 0) {
675: sendbuf[cnt++] = *b_sendbuf++;
676: b_sendj++;
677: }
678: }
679: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
681: /* -----------------------------------------------------------------
682: Gather all numerical values to all processors
683: */
684: if (!recvcounts) {
685: PetscMalloc2(size,&recvcounts,size,&displs);
686: }
687: for (i=0; i<size; i++) {
688: recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
689: }
690: displs[0] = 0;
691: for (i=1; i<size; i++) {
692: displs[i] = displs[i-1] + recvcounts[i-1];
693: }
694: recvbuf = b->a;
695: #if defined(PETSC_HAVE_MPI_IN_PLACE)
696: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
697: #else
698: MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
699: #endif
700: } /* endof (flag == MAT_GET_VALUES) */
701: PetscFree2(recvcounts,displs);
703: if (A->symmetric) {
704: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
705: } else if (A->hermitian) {
706: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
707: } else if (A->structurally_symmetric) {
708: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
709: }
710: return(0);
711: }
717: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
718: {
720: PetscInt nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
721: PetscBool rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns;
724: /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */
725: /* It would make sense to error out in MatGetSubMatrices_MPIAIJ_Local(), the most impl-specific level.
726: However, there are more careful users of MatGetSubMatrices_MPIAIJ_Local() -- MatPermute_MPIAIJ() -- that
727: take care to order the result correctly by assembling it with MatSetValues() (after preallocating).
728: */
729: for (i = 0; i < ismax; ++i) {
730: PetscBool sorted;
731: ISSorted(iscol[i], &sorted);
732: if (!sorted) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_SUP, "Column index set %D not sorted", i);
733: }
735: /*
736: Check for special case: each processor gets entire matrix
737: */
738: if (ismax == 1 && C->rmap->N == C->cmap->N) {
739: ISIdentity(*isrow,&rowflag);
740: ISIdentity(*iscol,&colflag);
741: ISGetLocalSize(*isrow,&nrow);
742: ISGetLocalSize(*iscol,&ncol);
743: if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
744: wantallmatrix = PETSC_TRUE;
746: PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);
747: }
748: }
749: MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPIU_BOOL,MPI_MIN,PetscObjectComm((PetscObject)C));
750: if (twantallmatrix) {
751: MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
752: return(0);
753: }
755: /* Allocate memory to hold all the submatrices */
756: if (scall != MAT_REUSE_MATRIX) {
757: PetscMalloc1((ismax+1),submat);
758: }
760: /* Check for special case: each processor gets entire matrix columns */
761: PetscMalloc1((ismax+1),&allcolumns);
762: for (i=0; i<ismax; i++) {
763: ISIdentity(iscol[i],&colflag);
764: ISGetLocalSize(iscol[i],&ncol);
765: if (colflag && ncol == C->cmap->N) {
766: allcolumns[i] = PETSC_TRUE;
767: } else {
768: allcolumns[i] = PETSC_FALSE;
769: }
770: }
772: /* Determine the number of stages through which submatrices are done */
773: nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt));
775: /*
776: Each stage will extract nmax submatrices.
777: nmax is determined by the matrix column dimension.
778: If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
779: */
780: if (!nmax) nmax = 1;
781: nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0);
783: /* Make sure every processor loops through the nstages */
784: MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));
786: for (i=0,pos=0; i<nstages; i++) {
787: if (pos+nmax <= ismax) max_no = nmax;
788: else if (pos == ismax) max_no = 0;
789: else max_no = ismax-pos;
790: MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,allcolumns+pos,*submat+pos);
791: pos += max_no;
792: }
794: PetscFree(allcolumns);
795: return(0);
796: }
798: /* -------------------------------------------------------------------------*/
801: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allcolumns,Mat *submats)
802: {
803: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
804: Mat A = c->A;
805: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
806: const PetscInt **icol,**irow;
807: PetscInt *nrow,*ncol,start;
809: PetscMPIInt rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
810: PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
811: PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
812: PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
813: PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
814: #if defined(PETSC_USE_CTABLE)
815: PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i;
816: #else
817: PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i;
818: #endif
819: const PetscInt *irow_i;
820: PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
821: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
822: MPI_Request *r_waits4,*s_waits3,*s_waits4;
823: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
824: MPI_Status *r_status3,*r_status4,*s_status4;
825: MPI_Comm comm;
826: PetscScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
827: PetscMPIInt *onodes1,*olengths1;
828: PetscMPIInt idex,idex2,end;
831: PetscObjectGetComm((PetscObject)C,&comm);
832: tag0 = ((PetscObject)C)->tag;
833: size = c->size;
834: rank = c->rank;
836: /* Get some new tags to keep the communication clean */
837: PetscObjectGetNewTag((PetscObject)C,&tag1);
838: PetscObjectGetNewTag((PetscObject)C,&tag2);
839: PetscObjectGetNewTag((PetscObject)C,&tag3);
841: PetscMalloc4(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol);
843: for (i=0; i<ismax; i++) {
844: ISGetIndices(isrow[i],&irow[i]);
845: ISGetLocalSize(isrow[i],&nrow[i]);
846: if (allcolumns[i]) {
847: icol[i] = NULL;
848: ncol[i] = C->cmap->N;
849: } else {
850: ISGetIndices(iscol[i],&icol[i]);
851: ISGetLocalSize(iscol[i],&ncol[i]);
852: }
853: }
855: /* evaluate communication - mesg to who, length of mesg, and buffer space
856: required. Based on this, buffers are allocated, and data copied into them*/
857: PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4); /* mesg size */
858: PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialize work vector*/
859: PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialize work vector*/
860: PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialize work vector*/
861: for (i=0; i<ismax; i++) {
862: PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
863: jmax = nrow[i];
864: irow_i = irow[i];
865: for (j=0; j<jmax; j++) {
866: l = 0;
867: row = irow_i[j];
868: while (row >= C->rmap->range[l+1]) l++;
869: proc = l;
870: w4[proc]++;
871: }
872: for (j=0; j<size; j++) {
873: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
874: }
875: }
877: nrqs = 0; /* no of outgoing messages */
878: msz = 0; /* total mesg length (for all procs) */
879: w1[rank] = 0; /* no mesg sent to self */
880: w3[rank] = 0;
881: for (i=0; i<size; i++) {
882: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
883: }
884: PetscMalloc1((nrqs+1),&pa); /*(proc -array)*/
885: for (i=0,j=0; i<size; i++) {
886: if (w1[i]) { pa[j] = i; j++; }
887: }
889: /* Each message would have a header = 1 + 2*(no of IS) + data */
890: for (i=0; i<nrqs; i++) {
891: j = pa[i];
892: w1[j] += w2[j] + 2* w3[j];
893: msz += w1[j];
894: }
895: PetscInfo2(0,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);
897: /* Determine the number of messages to expect, their lengths, from from-ids */
898: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
899: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
901: /* Now post the Irecvs corresponding to these messages */
902: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
904: PetscFree(onodes1);
905: PetscFree(olengths1);
907: /* Allocate Memory for outgoing messages */
908: PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);
909: PetscMemzero(sbuf1,size*sizeof(PetscInt*));
910: PetscMemzero(ptr,size*sizeof(PetscInt*));
912: {
913: PetscInt *iptr = tmp,ict = 0;
914: for (i=0; i<nrqs; i++) {
915: j = pa[i];
916: iptr += ict;
917: sbuf1[j] = iptr;
918: ict = w1[j];
919: }
920: }
922: /* Form the outgoing messages */
923: /* Initialize the header space */
924: for (i=0; i<nrqs; i++) {
925: j = pa[i];
926: sbuf1[j][0] = 0;
927: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
928: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
929: }
931: /* Parse the isrow and copy data into outbuf */
932: for (i=0; i<ismax; i++) {
933: PetscMemzero(ctr,size*sizeof(PetscInt));
934: irow_i = irow[i];
935: jmax = nrow[i];
936: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
937: l = 0;
938: row = irow_i[j];
939: while (row >= C->rmap->range[l+1]) l++;
940: proc = l;
941: if (proc != rank) { /* copy to the outgoing buf*/
942: ctr[proc]++;
943: *ptr[proc] = row;
944: ptr[proc]++;
945: }
946: }
947: /* Update the headers for the current IS */
948: for (j=0; j<size; j++) { /* Can Optimise this loop too */
949: if ((ctr_j = ctr[j])) {
950: sbuf1_j = sbuf1[j];
951: k = ++sbuf1_j[0];
952: sbuf1_j[2*k] = ctr_j;
953: sbuf1_j[2*k-1] = i;
954: }
955: }
956: }
958: /* Now post the sends */
959: PetscMalloc1((nrqs+1),&s_waits1);
960: for (i=0; i<nrqs; ++i) {
961: j = pa[i];
962: MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
963: }
965: /* Post Receives to capture the buffer size */
966: PetscMalloc1((nrqs+1),&r_waits2);
967: PetscMalloc1((nrqs+1),&rbuf2);
968: rbuf2[0] = tmp + msz;
969: for (i=1; i<nrqs; ++i) {
970: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
971: }
972: for (i=0; i<nrqs; ++i) {
973: j = pa[i];
974: MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
975: }
977: /* Send to other procs the buf size they should allocate */
980: /* Receive messages*/
981: PetscMalloc1((nrqr+1),&s_waits2);
982: PetscMalloc1((nrqr+1),&r_status1);
983: PetscMalloc3(nrqr,&sbuf2,nrqr,&req_size,nrqr,&req_source);
984: {
985: Mat_SeqAIJ *sA = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
986: PetscInt *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
987: PetscInt *sbuf2_i;
989: for (i=0; i<nrqr; ++i) {
990: MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);
992: req_size[idex] = 0;
993: rbuf1_i = rbuf1[idex];
994: start = 2*rbuf1_i[0] + 1;
995: MPI_Get_count(r_status1+i,MPIU_INT,&end);
996: PetscMalloc1((end+1),&sbuf2[idex]);
997: sbuf2_i = sbuf2[idex];
998: for (j=start; j<end; j++) {
999: id = rbuf1_i[j] - rstart;
1000: ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
1001: sbuf2_i[j] = ncols;
1002: req_size[idex] += ncols;
1003: }
1004: req_source[idex] = r_status1[i].MPI_SOURCE;
1005: /* form the header */
1006: sbuf2_i[0] = req_size[idex];
1007: for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j];
1009: MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
1010: }
1011: }
1012: PetscFree(r_status1);
1013: PetscFree(r_waits1);
1015: /* recv buffer sizes */
1016: /* Receive messages*/
1018: PetscMalloc1((nrqs+1),&rbuf3);
1019: PetscMalloc1((nrqs+1),&rbuf4);
1020: PetscMalloc1((nrqs+1),&r_waits3);
1021: PetscMalloc1((nrqs+1),&r_waits4);
1022: PetscMalloc1((nrqs+1),&r_status2);
1024: for (i=0; i<nrqs; ++i) {
1025: MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1026: PetscMalloc1((rbuf2[idex][0]+1),&rbuf3[idex]);
1027: PetscMalloc1((rbuf2[idex][0]+1),&rbuf4[idex]);
1028: MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1029: MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1030: }
1031: PetscFree(r_status2);
1032: PetscFree(r_waits2);
1034: /* Wait on sends1 and sends2 */
1035: PetscMalloc1((nrqs+1),&s_status1);
1036: PetscMalloc1((nrqr+1),&s_status2);
1038: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1039: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1040: PetscFree(s_status1);
1041: PetscFree(s_status2);
1042: PetscFree(s_waits1);
1043: PetscFree(s_waits2);
1045: /* Now allocate buffers for a->j, and send them off */
1046: PetscMalloc1((nrqr+1),&sbuf_aj);
1047: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1048: PetscMalloc1((j+1),&sbuf_aj[0]);
1049: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
1051: PetscMalloc1((nrqr+1),&s_waits3);
1052: {
1053: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1054: PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1055: PetscInt cend = C->cmap->rend;
1056: PetscInt *a_j = a->j,*b_j = b->j,ctmp;
1058: for (i=0; i<nrqr; i++) {
1059: rbuf1_i = rbuf1[i];
1060: sbuf_aj_i = sbuf_aj[i];
1061: ct1 = 2*rbuf1_i[0] + 1;
1062: ct2 = 0;
1063: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1064: kmax = rbuf1[i][2*j];
1065: for (k=0; k<kmax; k++,ct1++) {
1066: row = rbuf1_i[ct1] - rstart;
1067: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1068: ncols = nzA + nzB;
1069: cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];
1071: /* load the column indices for this row into cols*/
1072: cols = sbuf_aj_i + ct2;
1074: lwrite = 0;
1075: for (l=0; l<nzB; l++) {
1076: if ((ctmp = bmap[cworkB[l]]) < cstart) cols[lwrite++] = ctmp;
1077: }
1078: for (l=0; l<nzA; l++) cols[lwrite++] = cstart + cworkA[l];
1079: for (l=0; l<nzB; l++) {
1080: if ((ctmp = bmap[cworkB[l]]) >= cend) cols[lwrite++] = ctmp;
1081: }
1083: ct2 += ncols;
1084: }
1085: }
1086: MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1087: }
1088: }
1089: PetscMalloc1((nrqs+1),&r_status3);
1090: PetscMalloc1((nrqr+1),&s_status3);
1092: /* Allocate buffers for a->a, and send them off */
1093: PetscMalloc1((nrqr+1),&sbuf_aa);
1094: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1095: PetscMalloc1((j+1),&sbuf_aa[0]);
1096: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
1098: PetscMalloc1((nrqr+1),&s_waits4);
1099: {
1100: PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1101: PetscInt cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1102: PetscInt cend = C->cmap->rend;
1103: PetscInt *b_j = b->j;
1104: PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;
1106: for (i=0; i<nrqr; i++) {
1107: rbuf1_i = rbuf1[i];
1108: sbuf_aa_i = sbuf_aa[i];
1109: ct1 = 2*rbuf1_i[0]+1;
1110: ct2 = 0;
1111: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1112: kmax = rbuf1_i[2*j];
1113: for (k=0; k<kmax; k++,ct1++) {
1114: row = rbuf1_i[ct1] - rstart;
1115: nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
1116: ncols = nzA + nzB;
1117: cworkB = b_j + b_i[row];
1118: vworkA = a_a + a_i[row];
1119: vworkB = b_a + b_i[row];
1121: /* load the column values for this row into vals*/
1122: vals = sbuf_aa_i+ct2;
1124: lwrite = 0;
1125: for (l=0; l<nzB; l++) {
1126: if ((bmap[cworkB[l]]) < cstart) vals[lwrite++] = vworkB[l];
1127: }
1128: for (l=0; l<nzA; l++) vals[lwrite++] = vworkA[l];
1129: for (l=0; l<nzB; l++) {
1130: if ((bmap[cworkB[l]]) >= cend) vals[lwrite++] = vworkB[l];
1131: }
1133: ct2 += ncols;
1134: }
1135: }
1136: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1137: }
1138: }
1139: PetscFree(rbuf1[0]);
1140: PetscFree(rbuf1);
1141: PetscMalloc1((nrqs+1),&r_status4);
1142: PetscMalloc1((nrqr+1),&s_status4);
1144: /* Form the matrix */
1145: /* create col map: global col of C -> local col of submatrices */
1146: {
1147: const PetscInt *icol_i;
1148: #if defined(PETSC_USE_CTABLE)
1149: PetscMalloc1((1+ismax),&cmap);
1150: for (i=0; i<ismax; i++) {
1151: if (!allcolumns[i]) {
1152: PetscTableCreate(ncol[i]+1,C->cmap->N+1,&cmap[i]);
1154: jmax = ncol[i];
1155: icol_i = icol[i];
1156: cmap_i = cmap[i];
1157: for (j=0; j<jmax; j++) {
1158: PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);
1159: }
1160: } else {
1161: cmap[i] = NULL;
1162: }
1163: }
1164: #else
1165: PetscMalloc1(ismax,&cmap);
1166: for (i=0; i<ismax; i++) {
1167: if (!allcolumns[i]) {
1168: PetscMalloc1(C->cmap->N,&cmap[i]);
1169: PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));
1170: jmax = ncol[i];
1171: icol_i = icol[i];
1172: cmap_i = cmap[i];
1173: for (j=0; j<jmax; j++) {
1174: cmap_i[icol_i[j]] = j+1;
1175: }
1176: } else {
1177: cmap[i] = NULL;
1178: }
1179: }
1180: #endif
1181: }
1183: /* Create lens which is required for MatCreate... */
1184: for (i=0,j=0; i<ismax; i++) j += nrow[i];
1185: PetscMalloc1(ismax,&lens);
1186: if (ismax) {
1187: PetscMalloc1(j,&lens[0]);
1188: PetscMemzero(lens[0],j*sizeof(PetscInt));
1189: }
1190: for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1];
1192: /* Update lens from local data */
1193: for (i=0; i<ismax; i++) {
1194: jmax = nrow[i];
1195: if (!allcolumns[i]) cmap_i = cmap[i];
1196: irow_i = irow[i];
1197: lens_i = lens[i];
1198: for (j=0; j<jmax; j++) {
1199: l = 0;
1200: row = irow_i[j];
1201: while (row >= C->rmap->range[l+1]) l++;
1202: proc = l;
1203: if (proc == rank) {
1204: MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1205: if (!allcolumns[i]) {
1206: for (k=0; k<ncols; k++) {
1207: #if defined(PETSC_USE_CTABLE)
1208: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1209: #else
1210: tcol = cmap_i[cols[k]];
1211: #endif
1212: if (tcol) lens_i[j]++;
1213: }
1214: } else { /* allcolumns */
1215: lens_i[j] = ncols;
1216: }
1217: MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1218: }
1219: }
1220: }
1222: /* Create row map: global row of C -> local row of submatrices */
1223: #if defined(PETSC_USE_CTABLE)
1224: PetscMalloc1((1+ismax),&rmap);
1225: for (i=0; i<ismax; i++) {
1226: PetscTableCreate(nrow[i]+1,C->rmap->N+1,&rmap[i]);
1227: rmap_i = rmap[i];
1228: irow_i = irow[i];
1229: jmax = nrow[i];
1230: for (j=0; j<jmax; j++) {
1231: PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);
1232: }
1233: }
1234: #else
1235: PetscMalloc1(ismax,&rmap);
1236: if (ismax) {
1237: PetscMalloc1(ismax*C->rmap->N,&rmap[0]);
1238: PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1239: }
1240: for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
1241: for (i=0; i<ismax; i++) {
1242: rmap_i = rmap[i];
1243: irow_i = irow[i];
1244: jmax = nrow[i];
1245: for (j=0; j<jmax; j++) {
1246: rmap_i[irow_i[j]] = j;
1247: }
1248: }
1249: #endif
1251: /* Update lens from offproc data */
1252: {
1253: PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;
1255: for (tmp2=0; tmp2<nrqs; tmp2++) {
1256: MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1257: idex = pa[idex2];
1258: sbuf1_i = sbuf1[idex];
1259: jmax = sbuf1_i[0];
1260: ct1 = 2*jmax+1;
1261: ct2 = 0;
1262: rbuf2_i = rbuf2[idex2];
1263: rbuf3_i = rbuf3[idex2];
1264: for (j=1; j<=jmax; j++) {
1265: is_no = sbuf1_i[2*j-1];
1266: max1 = sbuf1_i[2*j];
1267: lens_i = lens[is_no];
1268: if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1269: rmap_i = rmap[is_no];
1270: for (k=0; k<max1; k++,ct1++) {
1271: #if defined(PETSC_USE_CTABLE)
1272: PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1273: row--;
1274: if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table");
1275: #else
1276: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1277: #endif
1278: max2 = rbuf2_i[ct1];
1279: for (l=0; l<max2; l++,ct2++) {
1280: if (!allcolumns[is_no]) {
1281: #if defined(PETSC_USE_CTABLE)
1282: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1283: #else
1284: tcol = cmap_i[rbuf3_i[ct2]];
1285: #endif
1286: if (tcol) lens_i[row]++;
1287: } else { /* allcolumns */
1288: lens_i[row]++; /* lens_i[row] += max2 ? */
1289: }
1290: }
1291: }
1292: }
1293: }
1294: }
1295: PetscFree(r_status3);
1296: PetscFree(r_waits3);
1297: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1298: PetscFree(s_status3);
1299: PetscFree(s_waits3);
1301: /* Create the submatrices */
1302: if (scall == MAT_REUSE_MATRIX) {
1303: PetscBool flag;
1305: /*
1306: Assumes new rows are same length as the old rows,hence bug!
1307: */
1308: for (i=0; i<ismax; i++) {
1309: mat = (Mat_SeqAIJ*)(submats[i]->data);
1310: if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1311: PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1312: if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1313: /* Initial matrix as if empty */
1314: PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));
1316: submats[i]->factortype = C->factortype;
1317: }
1318: } else {
1319: for (i=0; i<ismax; i++) {
1320: PetscInt rbs,cbs;
1321: ISGetBlockSize(isrow[i],&rbs);
1322: ISGetBlockSize(iscol[i],&cbs);
1324: MatCreate(PETSC_COMM_SELF,submats+i);
1325: MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);
1327: MatSetBlockSizes(submats[i],rbs,cbs);
1328: MatSetType(submats[i],((PetscObject)A)->type_name);
1329: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1330: }
1331: }
1333: /* Assemble the matrices */
1334: /* First assemble the local rows */
1335: {
1336: PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1337: PetscScalar *imat_a;
1339: for (i=0; i<ismax; i++) {
1340: mat = (Mat_SeqAIJ*)submats[i]->data;
1341: imat_ilen = mat->ilen;
1342: imat_j = mat->j;
1343: imat_i = mat->i;
1344: imat_a = mat->a;
1346: if (!allcolumns[i]) cmap_i = cmap[i];
1347: rmap_i = rmap[i];
1348: irow_i = irow[i];
1349: jmax = nrow[i];
1350: for (j=0; j<jmax; j++) {
1351: l = 0;
1352: row = irow_i[j];
1353: while (row >= C->rmap->range[l+1]) l++;
1354: proc = l;
1355: if (proc == rank) {
1356: old_row = row;
1357: #if defined(PETSC_USE_CTABLE)
1358: PetscTableFind(rmap_i,row+1,&row);
1359: row--;
1360: #else
1361: row = rmap_i[row];
1362: #endif
1363: ilen_row = imat_ilen[row];
1364: MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1365: mat_i = imat_i[row];
1366: mat_a = imat_a + mat_i;
1367: mat_j = imat_j + mat_i;
1368: if (!allcolumns[i]) {
1369: for (k=0; k<ncols; k++) {
1370: #if defined(PETSC_USE_CTABLE)
1371: PetscTableFind(cmap_i,cols[k]+1,&tcol);
1372: #else
1373: tcol = cmap_i[cols[k]];
1374: #endif
1375: if (tcol) {
1376: *mat_j++ = tcol - 1;
1377: *mat_a++ = vals[k];
1378: ilen_row++;
1379: }
1380: }
1381: } else { /* allcolumns */
1382: for (k=0; k<ncols; k++) {
1383: *mat_j++ = cols[k]; /* global col index! */
1384: *mat_a++ = vals[k];
1385: ilen_row++;
1386: }
1387: }
1388: MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1390: imat_ilen[row] = ilen_row;
1391: }
1392: }
1393: }
1394: }
1396: /* Now assemble the off proc rows*/
1397: {
1398: PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1399: PetscInt *imat_j,*imat_i;
1400: PetscScalar *imat_a,*rbuf4_i;
1402: for (tmp2=0; tmp2<nrqs; tmp2++) {
1403: MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1404: idex = pa[idex2];
1405: sbuf1_i = sbuf1[idex];
1406: jmax = sbuf1_i[0];
1407: ct1 = 2*jmax + 1;
1408: ct2 = 0;
1409: rbuf2_i = rbuf2[idex2];
1410: rbuf3_i = rbuf3[idex2];
1411: rbuf4_i = rbuf4[idex2];
1412: for (j=1; j<=jmax; j++) {
1413: is_no = sbuf1_i[2*j-1];
1414: rmap_i = rmap[is_no];
1415: if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1416: mat = (Mat_SeqAIJ*)submats[is_no]->data;
1417: imat_ilen = mat->ilen;
1418: imat_j = mat->j;
1419: imat_i = mat->i;
1420: imat_a = mat->a;
1421: max1 = sbuf1_i[2*j];
1422: for (k=0; k<max1; k++,ct1++) {
1423: row = sbuf1_i[ct1];
1424: #if defined(PETSC_USE_CTABLE)
1425: PetscTableFind(rmap_i,row+1,&row);
1426: row--;
1427: #else
1428: row = rmap_i[row];
1429: #endif
1430: ilen = imat_ilen[row];
1431: mat_i = imat_i[row];
1432: mat_a = imat_a + mat_i;
1433: mat_j = imat_j + mat_i;
1434: max2 = rbuf2_i[ct1];
1435: if (!allcolumns[is_no]) {
1436: for (l=0; l<max2; l++,ct2++) {
1438: #if defined(PETSC_USE_CTABLE)
1439: PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1440: #else
1441: tcol = cmap_i[rbuf3_i[ct2]];
1442: #endif
1443: if (tcol) {
1444: *mat_j++ = tcol - 1;
1445: *mat_a++ = rbuf4_i[ct2];
1446: ilen++;
1447: }
1448: }
1449: } else { /* allcolumns */
1450: for (l=0; l<max2; l++,ct2++) {
1451: *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */
1452: *mat_a++ = rbuf4_i[ct2];
1453: ilen++;
1454: }
1455: }
1456: imat_ilen[row] = ilen;
1457: }
1458: }
1459: }
1460: }
1462: PetscFree(r_status4);
1463: PetscFree(r_waits4);
1464: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1465: PetscFree(s_waits4);
1466: PetscFree(s_status4);
1468: /* Restore the indices */
1469: for (i=0; i<ismax; i++) {
1470: ISRestoreIndices(isrow[i],irow+i);
1471: if (!allcolumns[i]) {
1472: ISRestoreIndices(iscol[i],icol+i);
1473: }
1474: }
1476: /* Destroy allocated memory */
1477: PetscFree4(irow,icol,nrow,ncol);
1478: PetscFree4(w1,w2,w3,w4);
1479: PetscFree(pa);
1481: PetscFree4(sbuf1,ptr,tmp,ctr);
1482: PetscFree(rbuf2);
1483: for (i=0; i<nrqr; ++i) {
1484: PetscFree(sbuf2[i]);
1485: }
1486: for (i=0; i<nrqs; ++i) {
1487: PetscFree(rbuf3[i]);
1488: PetscFree(rbuf4[i]);
1489: }
1491: PetscFree3(sbuf2,req_size,req_source);
1492: PetscFree(rbuf3);
1493: PetscFree(rbuf4);
1494: PetscFree(sbuf_aj[0]);
1495: PetscFree(sbuf_aj);
1496: PetscFree(sbuf_aa[0]);
1497: PetscFree(sbuf_aa);
1499: #if defined(PETSC_USE_CTABLE)
1500: for (i=0; i<ismax; i++) {PetscTableDestroy((PetscTable*)&rmap[i]);}
1501: #else
1502: if (ismax) {PetscFree(rmap[0]);}
1503: #endif
1504: PetscFree(rmap);
1506: for (i=0; i<ismax; i++) {
1507: if (!allcolumns[i]) {
1508: #if defined(PETSC_USE_CTABLE)
1509: PetscTableDestroy((PetscTable*)&cmap[i]);
1510: #else
1511: PetscFree(cmap[i]);
1512: #endif
1513: }
1514: }
1515: PetscFree(cmap);
1516: if (ismax) {PetscFree(lens[0]);}
1517: PetscFree(lens);
1519: for (i=0; i<ismax; i++) {
1520: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1521: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1522: }
1523: return(0);
1524: }
1526: /*
1527: Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1528: Be careful not to destroy them elsewhere.
1529: */
1532: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1533: {
1534: /* If making this function public, change the error returned in this function away from _PLIB. */
1536: Mat_MPIAIJ *aij;
1537: PetscBool seqaij;
1540: /* Check to make sure the component matrices are compatible with C. */
1541: PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1542: if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1543: PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1544: if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1545: if (PetscAbs(A->rmap->n) != PetscAbs(B->rmap->n) || PetscAbs(A->rmap->bs) != PetscAbs(B->rmap->bs) || PetscAbs(A->cmap->bs) != PetscAbs(B->cmap->bs)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1547: MatCreate(comm, C);
1548: MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1549: MatSetBlockSizesFromMats(*C,A,A);
1550: PetscLayoutSetUp((*C)->rmap);
1551: PetscLayoutSetUp((*C)->cmap);
1552: if ((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1553: MatSetType(*C, MATMPIAIJ);
1554: aij = (Mat_MPIAIJ*)((*C)->data);
1555: aij->A = A;
1556: aij->B = B;
1557: PetscLogObjectParent((PetscObject)*C,(PetscObject)A);
1558: PetscLogObjectParent((PetscObject)*C,(PetscObject)B);
1560: (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1561: (*C)->assembled = (PetscBool)(A->assembled && B->assembled);
1562: return(0);
1563: }
1567: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1568: {
1569: Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);
1574: *A = aij->A;
1575: *B = aij->B;
1576: /* Note that we don't incref *A and *B, so be careful! */
1577: return(0);
1578: }
1582: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1583: PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1584: PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1585: PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1586: {
1588: PetscMPIInt size, flag;
1589: PetscInt i,ii;
1590: PetscInt ismax_c;
1593: if (!ismax) return(0);
1595: for (i = 0, ismax_c = 0; i < ismax; ++i) {
1596: MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1597: if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1598: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1599: if (size > 1) ++ismax_c;
1600: }
1601: if (!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1602: (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1603: } else { /* if (ismax_c) */
1604: Mat *A,*B;
1605: IS *isrow_c, *iscol_c;
1606: PetscMPIInt size;
1607: /*
1608: Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate
1609: array of sequential matrices underlying the resulting parallel matrices.
1610: Which arrays to allocate is based on the value of MatReuse scall.
1611: There are as many diag matrices as there are original index sets.
1612: There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.
1614: Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists,
1615: we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq
1616: will deposite the extracted diag and off-diag parts.
1617: However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1618: If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1619: */
1621: /* Parallel matrix array is allocated only if no reuse is taking place. */
1622: if (scall != MAT_REUSE_MATRIX) {
1623: PetscMalloc1((ismax),submat);
1624: } else {
1625: PetscMalloc1(ismax, &A);
1626: PetscMalloc1(ismax_c, &B);
1627: /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1628: for (i = 0, ii = 0; i < ismax; ++i) {
1629: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1630: if (size > 1) {
1631: (*extractseq)((*submat)[i],A+i,B+ii);
1632: ++ii;
1633: } else A[i] = (*submat)[i];
1634: }
1635: }
1636: /*
1637: Construct the complements of the iscol ISs for parallel ISs only.
1638: These are used to extract the off-diag portion of the resulting parallel matrix.
1639: The row IS for the off-diag portion is the same as for the diag portion,
1640: so we merely alias the row IS, while skipping those that are sequential.
1641: */
1642: PetscMalloc2(ismax_c,&isrow_c, ismax_c, &iscol_c);
1643: for (i = 0, ii = 0; i < ismax; ++i) {
1644: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1645: if (size > 1) {
1646: isrow_c[ii] = isrow[i];
1648: ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1649: ++ii;
1650: }
1651: }
1652: /* Now obtain the sequential A and B submatrices separately. */
1653: (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1654: (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1655: for (ii = 0; ii < ismax_c; ++ii) {
1656: ISDestroy(&iscol_c[ii]);
1657: }
1658: PetscFree2(isrow_c, iscol_c);
1659: /*
1660: If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1661: have been extracted directly into the parallel matrices containing them, or
1662: simply into the sequential matrix identical with the corresponding A (if size == 1).
1663: Otherwise, make sure that parallel matrices are constructed from A & B, or the
1664: A is put into the correct submat slot (if size == 1).
1665: */
1666: if (scall != MAT_REUSE_MATRIX) {
1667: for (i = 0, ii = 0; i < ismax; ++i) {
1668: MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1669: if (size > 1) {
1670: /*
1671: For each parallel isrow[i], create parallel matrices from the extracted sequential matrices.
1672: */
1673: /* Construct submat[i] from the Seq pieces A and B. */
1674: (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);
1676: ++ii;
1677: } else (*submat)[i] = A[i];
1678: }
1679: }
1680: PetscFree(A);
1681: PetscFree(B);
1682: }
1683: return(0);
1684: } /* MatGetSubMatricesParallel_MPIXAIJ() */
1690: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1691: {
1695: MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);
1696: return(0);
1697: }