Actual source code: ex183.c

petsc-3.14.6 2021-03-30
Report Typos and Errors
  1: static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n"
  2:   "This test can only be run in parallel.\n"
  3:   "\n";

  5: /*T
  6:    Concepts: Mat^mat submatrix, parallel
  7:    Processors: n
  8: T*/


 11: #include <petscmat.h>

 13: int main(int argc, char **args)
 14: {
 15:   Mat             A,*submats;
 16:   MPI_Comm        subcomm;
 17:   PetscMPIInt     rank,size,subrank,subsize,color;
 18:   PetscInt        m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1;
 19:   PetscInt        nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs;
 20:   PetscInt        *rowindices,*colindices,idx,rep;
 21:   PetscScalar     *vals;
 22:   IS              rowis[1],colis[1];
 23:   PetscViewer     viewer;
 24:   PetscBool       permute_indices,flg;
 25:   PetscErrorCode  ierr;

 27:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
 28:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 29:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);

 31:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");
 32:   m = 5;
 33:   PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg);
 34:   total_subdomains = size-1;
 35:   PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg);
 36:   permute_indices = PETSC_FALSE;
 37:   PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg);
 38:   hash = 7;
 39:   PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg);
 40:   rep = 2;
 41:   PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg);
 42:   PetscOptionsEnd();

 44:   if (total_subdomains > size) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %D must not exceed comm size %D",total_subdomains,size);
 45:   if (total_subdomains < 1 || total_subdomains > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %D (comm size), got total_subdomains = %D",size,total_subdomains);
 46:   if (rep != 1 && rep != 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %D; must be 1 or 2",rep);

 48:   viewer = PETSC_VIEWER_STDOUT_WORLD;
 49:   /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */
 50:   MatCreate(PETSC_COMM_WORLD,&A);
 51:   MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE);
 52:   MatSetFromOptions(A);
 53:   MatSetUp(A);
 54:   MatGetSize(A,NULL,&N);
 55:   MatGetLocalSize(A,NULL,&n);
 56:   MatGetBlockSize(A,&bs);
 57:   MatSeqAIJSetPreallocation(A,n,NULL);
 58:   MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL);
 59:   MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL);
 60:   MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);
 61:   MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL);
 62:   MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);

 64:   PetscMalloc2(N,&cols,N,&vals);
 65:   MatGetOwnershipRange(A,&rstart,&rend);
 66:   for (j = 0; j < N; ++j) cols[j] = j;
 67:   for (i=rstart; i<rend; i++) {
 68:     for (j=0;j<N;++j) {
 69:       vals[j] = i*10000+j;
 70:     }
 71:     MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES);
 72:   }
 73:   PetscFree2(cols,vals);
 74:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 75:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

 77:   PetscViewerASCIIPrintf(viewer,"Initial matrix:\n");
 78:   MatView(A,viewer);


 81:   /*
 82:      Create subcomms and ISs so that each rank participates in one IS.
 83:      The IS either coalesces adjacent rank indices (contiguous),
 84:      or selects indices by scrambling them using a hash.
 85:   */
 86:   k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */
 87:   color = rank/k;
 88:   MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm);
 89:   MPI_Comm_size(subcomm,&subsize);
 90:   MPI_Comm_rank(subcomm,&subrank);
 91:   MatGetOwnershipRange(A,&rstart,&rend);
 92:   nis = 1;
 93:   PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices);

 95:   for (j = rstart; j < rend; ++j) {
 96:     if (permute_indices) {
 97:       idx = (j*hash);
 98:     } else {
 99:       idx = j;
100:     }
101:     rowindices[j-rstart] = idx%N;
102:     colindices[j-rstart] = (idx+m)%N;
103:   }
104:   ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0]);
105:   ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0]);
106:   ISSort(rowis[0]);
107:   ISSort(colis[0]);
108:   PetscFree2(rowindices,colindices);
109:   /*
110:     Now view the ISs.  To avoid deadlock when viewing a list of objects on different subcomms,
111:     we need to obtain the global numbers of our local objects and wait for the corresponding global
112:     number to be viewed.
113:   */
114:   PetscViewerASCIIPrintf(viewer,"Subdomains");
115:   if (permute_indices) {
116:     PetscViewerASCIIPrintf(viewer," (hash=%D)",hash);
117:   }
118:   PetscViewerASCIIPrintf(viewer,":\n");
119:   PetscViewerFlush(viewer);

121:   nsubdomains = 1;
122:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
123:   PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums);
124:   PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);
125:   for (gs=0,s=0; gs < gnsubdomains;++gs) {
126:     if (s < nsubdomains) {
127:       PetscInt ss;
128:       ss = gsubdomainperm[s];
129:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
130:         PetscViewer subviewer = NULL;
131:         PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);
132:         PetscViewerASCIIPrintf(subviewer,"Row IS %D\n",gs);
133:         ISView(rowis[ss],subviewer);
134:         PetscViewerFlush(subviewer);
135:         PetscViewerASCIIPrintf(subviewer,"Col IS %D\n",gs);
136:         ISView(colis[ss],subviewer);
137:         PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);
138:         ++s;
139:       }
140:     }
141:     MPI_Barrier(PETSC_COMM_WORLD);
142:   }
143:   PetscViewerFlush(viewer);
144:   ISSort(rowis[0]);
145:   ISSort(colis[0]);
146:   nsubdomains = 1;
147:   MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats);
148:   /*
149:     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
150:     we need to obtain the global numbers of our local objects and wait for the corresponding global
151:     number to be viewed.
152:   */
153:   PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n");
154:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
155:   PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);
156:   PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);
157:   for (gs=0,s=0; gs < gnsubdomains;++gs) {
158:     if (s < nsubdomains) {
159:       PetscInt ss;
160:       ss = gsubdomainperm[s];
161:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
162:         PetscViewer subviewer = NULL;
163:         PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);
164:         MatView(submats[ss],subviewer);
165:         PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);
166:         ++s;
167:       }
168:     }
169:     MPI_Barrier(PETSC_COMM_WORLD);
170:   }
171:   PetscViewerFlush(viewer);
172:   if (rep == 1) goto cleanup;
173:   nsubdomains = 1;
174:   MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats);
175:   /*
176:     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
177:     we need to obtain the global numbers of our local objects and wait for the corresponding global
178:     number to be viewed.
179:   */
180:   PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n");
181:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
182:   PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);
183:   PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);
184:   for (gs=0,s=0; gs < gnsubdomains;++gs) {
185:     if (s < nsubdomains) {
186:       PetscInt ss;
187:       ss = gsubdomainperm[s];
188:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
189:         PetscViewer subviewer = NULL;
190:         PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);
191:         MatView(submats[ss],subviewer);
192:         PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);
193:         ++s;
194:       }
195:     }
196:     MPI_Barrier(PETSC_COMM_WORLD);
197:   }
198:   PetscViewerFlush(viewer);
199:   cleanup:
200:   for (k=0;k<nsubdomains;++k) {
201:     MatDestroy(submats+k);
202:   }
203:   PetscFree(submats);
204:   for (k=0;k<nis;++k) {
205:     ISDestroy(rowis+k);
206:     ISDestroy(colis+k);
207:   }
208:   MatDestroy(&A);
209:   MPI_Comm_free(&subcomm);
210:   PetscFinalize();
211:   return ierr;
212: }


215: /*TEST

217:    test:
218:       nsize: 2
219:       args: -total_subdomains 1
220:       output_file: output/ex183_2_1.out

222:    test:
223:       suffix: 2
224:       nsize: 3
225:       args: -total_subdomains 2
226:       output_file: output/ex183_3_2.out

228:    test:
229:       suffix: 3
230:       nsize: 4
231:       args: -total_subdomains 2
232:       output_file: output/ex183_4_2.out

234:    test:
235:       suffix: 4
236:       nsize: 6
237:       args: -total_subdomains 2
238:       output_file: output/ex183_6_2.out

240: TEST*/