Actual source code: fdmpiaij.c
petsc-3.6.1 2015-08-06
2: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3: #include <../src/mat/impls/baij/mpi/mpibaij.h>
4: #include <petsc/private/isimpl.h>
8: PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx)
9: {
10: PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
11: PetscErrorCode ierr;
12: PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j;
13: PetscScalar dx=0.0,*w3_array,*dy_i,*dy=coloring->dy;
14: PetscScalar *vscale_array;
15: const PetscScalar *xx;
16: PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm;
17: Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
18: void *fctx=coloring->fctx;
19: PetscInt ctype=coloring->ctype,nxloc,nrows_k;
20: PetscScalar *valaddr;
21: MatEntry *Jentry=coloring->matentry;
22: MatEntry2 *Jentry2=coloring->matentry2;
23: const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
24: PetscInt bs=J->rmap->bs;
27: /* (1) Set w1 = F(x1) */
28: if (!coloring->fset) {
29: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
30: (*f)(sctx,x1,w1,fctx);
31: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
32: } else {
33: coloring->fset = PETSC_FALSE;
34: }
36: /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
37: VecGetLocalSize(x1,&nxloc);
38: if (coloring->htype[0] == 'w') {
39: /* vscale = dx is a constant scalar */
40: VecNorm(x1,NORM_2,&unorm);
41: dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
42: } else {
43: VecGetArrayRead(x1,&xx);
44: VecGetArray(vscale,&vscale_array);
45: for (col=0; col<nxloc; col++) {
46: dx = xx[col];
47: if (PetscAbsScalar(dx) < umin) {
48: if (PetscRealPart(dx) >= 0.0) dx = umin;
49: else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
50: }
51: dx *= epsilon;
52: vscale_array[col] = 1.0/dx;
53: }
54: VecRestoreArrayRead(x1,&xx);
55: VecRestoreArray(vscale,&vscale_array);
56: }
57: if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
58: VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);
59: VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);
60: }
62: /* (3) Loop over each color */
63: if (!coloring->w3) {
64: VecDuplicate(x1,&coloring->w3);
65: PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);
66: }
67: w3 = coloring->w3;
69: VecGetOwnershipRange(x1,&cstart,&cend); /* used by ghosted vscale */
70: if (vscale) {
71: VecGetArray(vscale,&vscale_array);
72: }
73: nz = 0;
74: for (k=0; k<ncolors; k++) {
75: coloring->currentcolor = k;
77: /*
78: (3-1) Loop over each column associated with color
79: adding the perturbation to the vector w3 = x1 + dx.
80: */
81: VecCopy(x1,w3);
82: dy_i = dy;
83: for (i=0; i<bs; i++) { /* Loop over a block of columns */
84: VecGetArray(w3,&w3_array);
85: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
86: if (coloring->htype[0] == 'w') {
87: for (l=0; l<ncolumns[k]; l++) {
88: col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
89: w3_array[col] += 1.0/dx;
90: if (i) w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */
91: }
92: } else { /* htype == 'ds' */
93: vscale_array -= cstart; /* shift pointer so global index can be used */
94: for (l=0; l<ncolumns[k]; l++) {
95: col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
96: w3_array[col] += 1.0/vscale_array[col];
97: if (i) w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */
98: }
99: vscale_array += cstart;
100: }
101: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
102: VecRestoreArray(w3,&w3_array);
104: /*
105: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
106: w2 = F(x1 + dx) - F(x1)
107: */
108: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
109: VecPlaceArray(w2,dy_i); /* place w2 to the array dy_i */
110: (*f)(sctx,w3,w2,fctx);
111: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
112: VecAXPY(w2,-1.0,w1);
113: VecResetArray(w2);
114: dy_i += nxloc; /* points to dy+i*nxloc */
115: }
117: /*
118: (3-3) Loop over rows of vector, putting results into Jacobian matrix
119: */
120: nrows_k = nrows[k];
121: if (coloring->htype[0] == 'w') {
122: for (l=0; l<nrows_k; l++) {
123: row = bs*Jentry2[nz].row; /* local row index */
124: valaddr = Jentry2[nz++].valaddr;
125: spidx = 0;
126: dy_i = dy;
127: for (i=0; i<bs; i++) { /* column of the block */
128: for (j=0; j<bs; j++) { /* row of the block */
129: valaddr[spidx++] = dy_i[row+j]*dx;
130: }
131: dy_i += nxloc; /* points to dy+i*nxloc */
132: }
133: }
134: } else { /* htype == 'ds' */
135: for (l=0; l<nrows_k; l++) {
136: row = bs*Jentry[nz].row; /* local row index */
137: col = bs*Jentry[nz].col; /* local column index */
138: valaddr = Jentry[nz++].valaddr;
139: spidx = 0;
140: dy_i = dy;
141: for (i=0; i<bs; i++) { /* column of the block */
142: for (j=0; j<bs; j++) { /* row of the block */
143: valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i];
144: }
145: dy_i += nxloc; /* points to dy+i*nxloc */
146: }
147: }
148: }
149: }
150: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
152: if (vscale) {
153: VecRestoreArray(vscale,&vscale_array);
154: }
156: coloring->currentcolor = -1;
157: return(0);
158: }
162: PetscErrorCode MatFDColoringApply_AIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx)
163: {
164: PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
165: PetscErrorCode ierr;
166: PetscInt k,cstart,cend,l,row,col,nz;
167: PetscScalar dx=0.0,*y,*w3_array;
168: const PetscScalar *xx;
169: PetscScalar *vscale_array;
170: PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm;
171: Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale;
172: void *fctx=coloring->fctx;
173: PetscInt ctype=coloring->ctype,nxloc,nrows_k;
174: MatEntry *Jentry=coloring->matentry;
175: MatEntry2 *Jentry2=coloring->matentry2;
176: const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows;
179: /* (1) Set w1 = F(x1) */
180: if (!coloring->fset) {
181: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
182: (*f)(sctx,x1,w1,fctx);
183: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
184: } else {
185: coloring->fset = PETSC_FALSE;
186: }
188: /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
189: if (coloring->htype[0] == 'w') {
190: /* vscale = 1./dx is a constant scalar */
191: VecNorm(x1,NORM_2,&unorm);
192: dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon);
193: } else {
194: VecGetLocalSize(x1,&nxloc);
195: VecGetArrayRead(x1,&xx);
196: VecGetArray(vscale,&vscale_array);
197: for (col=0; col<nxloc; col++) {
198: dx = xx[col];
199: if (PetscAbsScalar(dx) < umin) {
200: if (PetscRealPart(dx) >= 0.0) dx = umin;
201: else if (PetscRealPart(dx) < 0.0 ) dx = -umin;
202: }
203: dx *= epsilon;
204: vscale_array[col] = 1.0/dx;
205: }
206: VecRestoreArrayRead(x1,&xx);
207: VecRestoreArray(vscale,&vscale_array);
208: }
209: if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
210: VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);
211: VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);
212: }
214: /* (3) Loop over each color */
215: if (!coloring->w3) {
216: VecDuplicate(x1,&coloring->w3);
217: PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);
218: }
219: w3 = coloring->w3;
221: VecGetOwnershipRange(x1,&cstart,&cend); /* used by ghosted vscale */
222: if (vscale) {
223: VecGetArray(vscale,&vscale_array);
224: }
225: nz = 0;
227: if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
228: PetscInt i,m=J->rmap->n,nbcols,bcols=coloring->bcols;
229: PetscScalar *dy=coloring->dy,*dy_k;
231: nbcols = 0;
232: for (k=0; k<ncolors; k+=bcols) {
233: coloring->currentcolor = k;
235: /*
236: (3-1) Loop over each column associated with color
237: adding the perturbation to the vector w3 = x1 + dx.
238: */
240: dy_k = dy;
241: if (k + bcols > ncolors) bcols = ncolors - k;
242: for (i=0; i<bcols; i++) {
244: VecCopy(x1,w3);
245: VecGetArray(w3,&w3_array);
246: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
247: if (coloring->htype[0] == 'w') {
248: for (l=0; l<ncolumns[k+i]; l++) {
249: col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */
250: w3_array[col] += 1.0/dx;
251: }
252: } else { /* htype == 'ds' */
253: vscale_array -= cstart; /* shift pointer so global index can be used */
254: for (l=0; l<ncolumns[k+i]; l++) {
255: col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */
256: w3_array[col] += 1.0/vscale_array[col];
257: }
258: vscale_array += cstart;
259: }
260: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
261: VecRestoreArray(w3,&w3_array);
263: /*
264: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
265: w2 = F(x1 + dx) - F(x1)
266: */
267: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
268: VecPlaceArray(w2,dy_k); /* place w2 to the array dy_i */
269: (*f)(sctx,w3,w2,fctx);
270: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
271: VecAXPY(w2,-1.0,w1);
272: VecResetArray(w2);
273: dy_k += m; /* points to dy+i*nxloc */
274: }
276: /*
277: (3-3) Loop over block rows of vector, putting results into Jacobian matrix
278: */
279: nrows_k = nrows[nbcols++];
280: VecGetArray(w2,&y);
282: if (coloring->htype[0] == 'w') {
283: for (l=0; l<nrows_k; l++) {
284: row = Jentry2[nz].row; /* local row index */
285: *(Jentry2[nz++].valaddr) = dy[row]*dx;
286: }
287: } else { /* htype == 'ds' */
288: for (l=0; l<nrows_k; l++) {
289: row = Jentry[nz].row; /* local row index */
290: *(Jentry[nz].valaddr) = dy[row]*vscale_array[Jentry[nz].col];
291: nz++;
292: }
293: }
294: VecRestoreArray(w2,&y);
295: }
296: } else { /* bcols == 1 */
297: for (k=0; k<ncolors; k++) {
298: coloring->currentcolor = k;
300: /*
301: (3-1) Loop over each column associated with color
302: adding the perturbation to the vector w3 = x1 + dx.
303: */
304: VecCopy(x1,w3);
305: VecGetArray(w3,&w3_array);
306: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
307: if (coloring->htype[0] == 'w') {
308: for (l=0; l<ncolumns[k]; l++) {
309: col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
310: w3_array[col] += 1.0/dx;
311: }
312: } else { /* htype == 'ds' */
313: vscale_array -= cstart; /* shift pointer so global index can be used */
314: for (l=0; l<ncolumns[k]; l++) {
315: col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
316: w3_array[col] += 1.0/vscale_array[col];
317: }
318: vscale_array += cstart;
319: }
320: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
321: VecRestoreArray(w3,&w3_array);
323: /*
324: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
325: w2 = F(x1 + dx) - F(x1)
326: */
327: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
328: (*f)(sctx,w3,w2,fctx);
329: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
330: VecAXPY(w2,-1.0,w1);
332: /*
333: (3-3) Loop over rows of vector, putting results into Jacobian matrix
334: */
335: nrows_k = nrows[k];
336: VecGetArray(w2,&y);
337: if (coloring->htype[0] == 'w') {
338: for (l=0; l<nrows_k; l++) {
339: row = Jentry2[nz].row; /* local row index */
340: *(Jentry2[nz++].valaddr) = y[row]*dx;
341: }
342: } else { /* htype == 'ds' */
343: for (l=0; l<nrows_k; l++) {
344: row = Jentry[nz].row; /* local row index */
345: *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col];
346: nz++;
347: }
348: }
349: VecRestoreArray(w2,&y);
350: }
351: }
353: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
354: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
355: if (vscale) {
356: VecRestoreArray(vscale,&vscale_array);
357: }
358: coloring->currentcolor = -1;
359: return(0);
360: }
364: PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
365: {
366: PetscErrorCode ierr;
367: PetscMPIInt size,*ncolsonproc,*disp,nn;
368: PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb;
369: const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL;
370: PetscInt nis=iscoloring->n,nctot,*cols;
371: IS *isa;
372: ISLocalToGlobalMapping map=mat->cmap->mapping;
373: PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx;
374: Mat A,B;
375: PetscScalar *A_val,*B_val,**valaddrhit;
376: MatEntry *Jentry;
377: MatEntry2 *Jentry2;
378: PetscBool isBAIJ;
379: PetscInt bcols=c->bcols;
380: #if defined(PETSC_USE_CTABLE)
381: PetscTable colmap=NULL;
382: #else
383: PetscInt *colmap=NULL; /* local col number of off-diag col */
384: #endif
387: if (ctype == IS_COLORING_GHOSTED) {
388: if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
389: ISLocalToGlobalMappingGetIndices(map,<og);
390: }
392: MatGetBlockSize(mat,&bs);
393: PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);
394: if (isBAIJ) {
395: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
396: Mat_SeqBAIJ *spA,*spB;
397: A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a;
398: B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a;
399: nz = spA->nz + spB->nz; /* total nonzero entries of mat */
400: if (!baij->colmap) {
401: MatCreateColmap_MPIBAIJ_Private(mat);
402: colmap = baij->colmap;
403: }
404: MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);
405: MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);
407: if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
408: PetscInt *garray;
409: PetscMalloc1(B->cmap->n,&garray);
410: for (i=0; i<baij->B->cmap->n/bs; i++) {
411: for (j=0; j<bs; j++) {
412: garray[i*bs+j] = bs*baij->garray[i]+j;
413: }
414: }
415: VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);
416: PetscFree(garray);
417: }
418: } else {
419: Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data;
420: Mat_SeqAIJ *spA,*spB;
421: A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a;
422: B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a;
423: nz = spA->nz + spB->nz; /* total nonzero entries of mat */
424: if (!aij->colmap) {
425: /* Allow access to data structures of local part of matrix
426: - creates aij->colmap which maps global column number to local number in part B */
427: MatCreateColmap_MPIAIJ_Private(mat);
428: colmap = aij->colmap;
429: }
430: MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);
431: MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);
433: bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
435: if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
436: VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);
437: }
438: }
440: m = mat->rmap->n/bs;
441: cstart = mat->cmap->rstart/bs;
442: cend = mat->cmap->rend/bs;
444: PetscMalloc1(nis,&c->ncolumns);
445: PetscMalloc1(nis,&c->columns);
446: PetscMalloc1(nis,&c->nrows);
447: PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));
449: if (c->htype[0] == 'd') {
450: PetscMalloc1(nz,&Jentry);
451: PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));
452: c->matentry = Jentry;
453: } else if (c->htype[0] == 'w') {
454: PetscMalloc1(nz,&Jentry2);
455: PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));
456: c->matentry2 = Jentry2;
457: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported");
459: PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit);
460: nz = 0;
461: ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);
462: for (i=0; i<nis; i++) { /* for each local color */
463: ISGetLocalSize(isa[i],&n);
464: ISGetIndices(isa[i],&is);
466: c->ncolumns[i] = n; /* local number of columns of this color on this process */
467: if (n) {
468: PetscMalloc1(n,&c->columns[i]);
469: PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));
470: PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));
471: } else {
472: c->columns[i] = 0;
473: }
475: if (ctype == IS_COLORING_GLOBAL) {
476: /* Determine nctot, the total (parallel) number of columns of this color */
477: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
478: PetscMalloc2(size,&ncolsonproc,size,&disp);
480: /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
481: PetscMPIIntCast(n,&nn);
482: MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));
483: nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
484: if (!nctot) {
485: PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");
486: }
488: disp[0] = 0;
489: for (j=1; j<size; j++) {
490: disp[j] = disp[j-1] + ncolsonproc[j-1];
491: }
493: /* Get cols, the complete list of columns for this color on each process */
494: PetscMalloc1(nctot+1,&cols);
495: MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));
496: PetscFree2(ncolsonproc,disp);
497: } else if (ctype == IS_COLORING_GHOSTED) {
498: /* Determine local number of columns of this color on this process, including ghost points */
499: nctot = n;
500: PetscMalloc1(nctot+1,&cols);
501: PetscMemcpy(cols,is,n*sizeof(PetscInt));
502: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
504: /* Mark all rows affect by these columns */
505: PetscMemzero(rowhit,m*sizeof(PetscInt));
506: bs2 = bs*bs;
507: nrows_i = 0;
508: for (j=0; j<nctot; j++) { /* loop over columns*/
509: if (ctype == IS_COLORING_GHOSTED) {
510: col = ltog[cols[j]];
511: } else {
512: col = cols[j];
513: }
514: if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
515: row = A_cj + A_ci[col-cstart];
516: nrows = A_ci[col-cstart+1] - A_ci[col-cstart];
517: nrows_i += nrows;
518: /* loop over columns of A marking them in rowhit */
519: for (k=0; k<nrows; k++) {
520: /* set valaddrhit for part A */
521: spidx = bs2*spidxA[A_ci[col-cstart] + k];
522: valaddrhit[*row] = &A_val[spidx];
523: rowhit[*row++] = col - cstart + 1; /* local column index */
524: }
525: } else { /* column is in B, off-diagonal block of mat */
526: #if defined(PETSC_USE_CTABLE)
527: PetscTableFind(colmap,col+1,&colb);
528: colb--;
529: #else
530: colb = colmap[col] - 1; /* local column index */
531: #endif
532: if (colb == -1) {
533: nrows = 0;
534: } else {
535: colb = colb/bs;
536: row = B_cj + B_ci[colb];
537: nrows = B_ci[colb+1] - B_ci[colb];
538: }
539: nrows_i += nrows;
540: /* loop over columns of B marking them in rowhit */
541: for (k=0; k<nrows; k++) {
542: /* set valaddrhit for part B */
543: spidx = bs2*spidxB[B_ci[colb] + k];
544: valaddrhit[*row] = &B_val[spidx];
545: rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */
546: }
547: }
548: }
549: c->nrows[i] = nrows_i;
551: if (c->htype[0] == 'd') {
552: for (j=0; j<m; j++) {
553: if (rowhit[j]) {
554: Jentry[nz].row = j; /* local row index */
555: Jentry[nz].col = rowhit[j] - 1; /* local column index */
556: Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
557: nz++;
558: }
559: }
560: } else { /* c->htype == 'wp' */
561: for (j=0; j<m; j++) {
562: if (rowhit[j]) {
563: Jentry2[nz].row = j; /* local row index */
564: Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
565: nz++;
566: }
567: }
568: }
569: PetscFree(cols);
570: }
572: if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
573: MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);
574: }
576: if (isBAIJ) {
577: MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);
578: MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);
579: PetscMalloc1(bs*mat->rmap->n,&c->dy);
580: } else {
581: MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);
582: MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);
583: }
585: ISColoringRestoreIS(iscoloring,&isa);
586: PetscFree2(rowhit,valaddrhit);
588: if (ctype == IS_COLORING_GHOSTED) {
589: ISLocalToGlobalMappingRestoreIndices(map,<og);
590: }
591: PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);
592: return(0);
593: }
597: PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
598: {
600: PetscInt bs,nis=iscoloring->n,m=mat->rmap->n;
601: PetscBool isBAIJ;
604: /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
605: bcols is chosen s.t. dy-array takes 50% of memory space as mat */
606: MatGetBlockSize(mat,&bs);
607: PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);
608: if (isBAIJ || m == 0) {
609: c->brows = m;
610: c->bcols = 1;
611: } else { /* mpiaij matrix */
612: /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
613: Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data;
614: Mat_SeqAIJ *spA,*spB;
615: Mat A,B;
616: PetscInt nz,brows,bcols;
617: PetscReal mem;
619: bs = 1; /* only bs=1 is supported for MPIAIJ matrix */
621: A = aij->A; spA = (Mat_SeqAIJ*)A->data;
622: B = aij->B; spB = (Mat_SeqAIJ*)B->data;
623: nz = spA->nz + spB->nz; /* total local nonzero entries of mat */
624: mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt);
625: bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar)));
626: brows = 1000/bcols;
627: if (bcols > nis) bcols = nis;
628: if (brows == 0 || brows > m) brows = m;
629: c->brows = brows;
630: c->bcols = bcols;
631: }
633: c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */
634: c->N = mat->cmap->N/bs;
635: c->m = mat->rmap->n/bs;
636: c->rstart = mat->rmap->rstart/bs;
637: c->ncolors = nis;
638: return(0);
639: }