2: /*
3: Defines basic operations for the MATSEQAIJPERM matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but augments
6: it with some permutation information that enables some operations
7: to be more vectorizable. A physically rearranged copy of the matrix
8: may be stored if the user desires.
10: Eventually a variety of permutations may be supported.
11: */
13: #include <../src/mat/impls/aij/seq/aij.h> 15: #define NDIM 512 16: /* NDIM specifies how many rows at a time we should work with when
17: * performing the vectorized mat-vec. This depends on various factors
18: * such as vector register length, etc., and I really need to add a
19: * way for the user (or the library) to tune this. I'm setting it to
20: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
21: * routines. */
23: typedef struct {
24: PetscObjectState nonzerostate; /* used to determine if the nonzero structure has changed and hence the permutations need updating */
26: PetscInt ngroup;
27: PetscInt *xgroup;
28: /* Denotes where groups of rows with same number of nonzeros
29: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
30: * where the ith group begins. */
32: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
33: PetscInt *iperm; /* The permutation vector. */
35: /* Some of this stuff is for Ed's recursive triangular solve.
36: * I'm not sure what I need yet. */
37: PetscInt blocksize;
38: PetscInt nstep;
39: PetscInt *jstart_list;
40: PetscInt *jend_list;
41: PetscInt *action_list;
42: PetscInt *ngroup_list;
43: PetscInt **ipointer_list;
44: PetscInt **xgroup_list;
45: PetscInt **nzgroup_list;
46: PetscInt **iperm_list;
47: } Mat_SeqAIJPERM;
49: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 50: {
51: /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */
52: /* so we will ignore 'MatType type'. */
54: Mat B = *newmat;
55: Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr;
58: if (reuse == MAT_INITIAL_MATRIX) {
59: MatDuplicate(A,MAT_COPY_VALUES,&B);
60: aijperm=(Mat_SeqAIJPERM*)B->spptr;
61: }
63: /* Reset the original function pointers. */
64: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
65: B->ops->destroy = MatDestroy_SeqAIJ;
66: B->ops->duplicate = MatDuplicate_SeqAIJ;
67: B->ops->mult = MatMult_SeqAIJ;
68: B->ops->multadd = MatMultAdd_SeqAIJ;
70: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",NULL);
71: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",NULL);
72: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",NULL);
73: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",NULL);
75: /* Free everything in the Mat_SeqAIJPERM data structure.*/
76: PetscFree(aijperm->xgroup);
77: PetscFree(aijperm->nzgroup);
78: PetscFree(aijperm->iperm);
79: PetscFree(B->spptr);
81: /* Change the type of B to MATSEQAIJ. */
82: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
84: *newmat = B;
85: return(0);
86: }
88: PetscErrorCode MatDestroy_SeqAIJPERM(Mat A) 89: {
91: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
94: if (aijperm) {
95: /* If MatHeaderMerge() was used then this SeqAIJPERM matrix will not have a spprt. */
96: PetscFree(aijperm->xgroup);
97: PetscFree(aijperm->nzgroup);
98: PetscFree(aijperm->iperm);
99: PetscFree(A->spptr);
100: }
101: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
102: * to destroy everything that remains. */
103: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
104: /* Note that I don't call MatSetType(). I believe this is because that
105: * is only to be called when *building* a matrix. I could be wrong, but
106: * that is how things work for the SuperLU matrix class. */
107: MatDestroy_SeqAIJ(A);
108: return(0);
109: }
111: PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M)112: {
114: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
115: Mat_SeqAIJPERM *aijperm_dest;
116: PetscBool perm;
119: MatDuplicate_SeqAIJ(A,op,M);
120: PetscObjectTypeCompare((PetscObject)*M,MATSEQAIJPERM,&perm);
121: if (perm) {
122: aijperm_dest = (Mat_SeqAIJPERM *) (*M)->spptr;
123: PetscFree(aijperm_dest->xgroup);
124: PetscFree(aijperm_dest->nzgroup);
125: PetscFree(aijperm_dest->iperm);
126: } else {
127: PetscNewLog(*M,&aijperm_dest);
128: (*M)->spptr = (void*) aijperm_dest;
129: PetscObjectChangeTypeName((PetscObject)*M,MATSEQAIJPERM);
130: PetscObjectComposeFunction((PetscObject)*M,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);
131: PetscObjectComposeFunction((PetscObject)*M,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);
132: PetscObjectComposeFunction((PetscObject)*M,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
133: PetscObjectComposeFunction((PetscObject)*M,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);
134: }
135: PetscMemcpy(aijperm_dest,aijperm,sizeof(Mat_SeqAIJPERM));
136: /* Allocate space for, and copy the grouping and permutation info.
137: * I note that when the groups are initially determined in
138: * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than
139: * necessary. But at this point, we know how large they need to be, and
140: * allocate only the necessary amount of memory. So the duplicated matrix
141: * may actually use slightly less storage than the original! */
142: PetscMalloc1(A->rmap->n, &aijperm_dest->iperm);
143: PetscMalloc1(aijperm->ngroup+1, &aijperm_dest->xgroup);
144: PetscMalloc1(aijperm->ngroup, &aijperm_dest->nzgroup);
145: PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);
146: PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));
147: PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);
148: return(0);
149: }
151: PetscErrorCode MatSeqAIJPERM_create_perm(Mat A)152: {
154: Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data;
155: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
156: PetscInt m; /* Number of rows in the matrix. */
157: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
158: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
159: PetscInt *rows_in_bucket;
160: /* To construct the permutation, we sort each row into one of maxnz
161: * buckets based on how many nonzeros are in the row. */
162: PetscInt nz;
163: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
164: PetscInt *ipnz;
165: /* When constructing the iperm permutation vector,
166: * ipnz[nz] is used to point to the next place in the permutation vector
167: * that a row with nz nonzero elements should be placed.*/
168: PetscInt i, ngroup, istart, ipos;
171: if (aijperm->nonzerostate == A->nonzerostate) return(0); /* permutation exists and matches current nonzero structure */
172: aijperm->nonzerostate = A->nonzerostate;
173: /* Free anything previously put in the Mat_SeqAIJPERM data structure. */
174: PetscFree(aijperm->xgroup);
175: PetscFree(aijperm->nzgroup);
176: PetscFree(aijperm->iperm);
178: m = A->rmap->n;
179: ia = a->i;
181: /* Allocate the arrays that will hold the permutation vector. */
182: PetscMalloc1(m, &aijperm->iperm);
184: /* Allocate some temporary work arrays that will be used in
185: * calculating the permuation vector and groupings. */
186: PetscMalloc1(m, &nz_in_row);
188: /* Now actually figure out the permutation and grouping. */
190: /* First pass: Determine number of nonzeros in each row, maximum
191: * number of nonzeros in any row, and how many rows fall into each
192: * "bucket" of rows with same number of nonzeros. */
193: maxnz = 0;
194: for (i=0; i<m; i++) {
195: nz_in_row[i] = ia[i+1]-ia[i];
196: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
197: }
198: PetscMalloc1(PetscMax(maxnz,m)+1, &rows_in_bucket);
199: PetscMalloc1(PetscMax(maxnz,m)+1, &ipnz);
201: for (i=0; i<=maxnz; i++) {
202: rows_in_bucket[i] = 0;
203: }
204: for (i=0; i<m; i++) {
205: nz = nz_in_row[i];
206: rows_in_bucket[nz]++;
207: }
209: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
210: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
211: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
212: * then xgroup[] must consist of (maxnz + 2) elements, since the last
213: * element of xgroup will tell us where the (maxnz + 1)th group ends.
214: * We allocate space for the maximum number of groups;
215: * that is potentially a little wasteful, but not too much so.
216: * Perhaps I should fix it later. */
217: PetscMalloc1(maxnz+2, &aijperm->xgroup);
218: PetscMalloc1(maxnz+1, &aijperm->nzgroup);
220: /* Second pass. Look at what is in the buckets and create the groupings.
221: * Note that it is OK to have a group of rows with no non-zero values. */
222: ngroup = 0;
223: istart = 0;
224: for (i=0; i<=maxnz; i++) {
225: if (rows_in_bucket[i] > 0) {
226: aijperm->nzgroup[ngroup] = i;
227: aijperm->xgroup[ngroup] = istart;
228: ngroup++;
229: istart += rows_in_bucket[i];
230: }
231: }
233: aijperm->xgroup[ngroup] = istart;
234: aijperm->ngroup = ngroup;
236: /* Now fill in the permutation vector iperm. */
237: ipnz[0] = 0;
238: for (i=0; i<maxnz; i++) {
239: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
240: }
242: for (i=0; i<m; i++) {
243: nz = nz_in_row[i];
244: ipos = ipnz[nz];
245: aijperm->iperm[ipos] = i;
246: ipnz[nz]++;
247: }
249: /* Clean up temporary work arrays. */
250: PetscFree(rows_in_bucket);
251: PetscFree(ipnz);
252: PetscFree(nz_in_row);
253: return(0);
254: }
257: PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode)258: {
260: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
263: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
265: /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some
266: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
267: * I'm not sure if this is the best way to do this, but it avoids
268: * a lot of code duplication.
269: * I also note that currently MATSEQAIJPERM doesn't know anything about
270: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
271: * are many zero rows. If the SeqAIJ assembly end routine decides to use
272: * this, this may break things. (Don't know... haven't looked at it.) */
273: a->inode.use = PETSC_FALSE;
274: MatAssemblyEnd_SeqAIJ(A, mode);
276: /* Now calculate the permutation and grouping information. */
277: MatSeqAIJPERM_create_perm(A);
278: return(0);
279: }
281: PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)282: {
283: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
284: const PetscScalar *x;
285: PetscScalar *y;
286: const MatScalar *aa;
287: PetscErrorCode ierr;
288: const PetscInt *aj,*ai;
289: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
290: PetscInt i,j;
291: #endif
293: /* Variables that don't appear in MatMult_SeqAIJ. */
294: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
295: PetscInt *iperm; /* Points to the permutation vector. */
296: PetscInt *xgroup;
297: /* Denotes where groups of rows with same number of nonzeros
298: * begin and end in iperm. */
299: PetscInt *nzgroup;
300: PetscInt ngroup;
301: PetscInt igroup;
302: PetscInt jstart,jend;
303: /* jstart is used in loops to denote the position in iperm where a
304: * group starts; jend denotes the position where it ends.
305: * (jend + 1 is where the next group starts.) */
306: PetscInt iold,nz;
307: PetscInt istart,iend,isize;
308: PetscInt ipos;
309: PetscScalar yp[NDIM];
310: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
312: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
313: #pragma disjoint(*x,*y,*aa)
314: #endif
317: VecGetArrayRead(xx,&x);
318: VecGetArray(yy,&y);
319: aj = a->j; /* aj[k] gives column index for element aa[k]. */
320: aa = a->a; /* Nonzero elements stored row-by-row. */
321: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
323: /* Get the info we need about the permutations and groupings. */
324: iperm = aijperm->iperm;
325: ngroup = aijperm->ngroup;
326: xgroup = aijperm->xgroup;
327: nzgroup = aijperm->nzgroup;
329: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)
330: fortranmultaijperm_(&m,x,ii,aj,aa,y);
331: #else
333: for (igroup=0; igroup<ngroup; igroup++) {
334: jstart = xgroup[igroup];
335: jend = xgroup[igroup+1] - 1;
336: nz = nzgroup[igroup];
338: /* Handle the special cases where the number of nonzeros per row
339: * in the group is either 0 or 1. */
340: if (nz == 0) {
341: for (i=jstart; i<=jend; i++) {
342: y[iperm[i]] = 0.0;
343: }
344: } else if (nz == 1) {
345: for (i=jstart; i<=jend; i++) {
346: iold = iperm[i];
347: ipos = ai[iold];
348: y[iold] = aa[ipos] * x[aj[ipos]];
349: }
350: } else {
352: /* We work our way through the current group in chunks of NDIM rows
353: * at a time. */
355: for (istart=jstart; istart<=jend; istart+=NDIM) {
356: /* Figure out where the chunk of 'isize' rows ends in iperm.
357: * 'isize may of course be less than NDIM for the last chunk. */
358: iend = istart + (NDIM - 1);
360: if (iend > jend) iend = jend;
362: isize = iend - istart + 1;
364: /* Initialize the yp[] array that will be used to hold part of
365: * the permuted results vector, and figure out where in aa each
366: * row of the chunk will begin. */
367: for (i=0; i<isize; i++) {
368: iold = iperm[istart + i];
369: /* iold is a row number from the matrix A *before* reordering. */
370: ip[i] = ai[iold];
371: /* ip[i] tells us where the ith row of the chunk begins in aa. */
372: yp[i] = (PetscScalar) 0.0;
373: }
375: /* If the number of zeros per row exceeds the number of rows in
376: * the chunk, we should vectorize along nz, that is, perform the
377: * mat-vec one row at a time as in the usual CSR case. */
378: if (nz > isize) {
379: #if defined(PETSC_HAVE_CRAY_VECTOR)
380: #pragma _CRI preferstream
381: #endif
382: for (i=0; i<isize; i++) {
383: #if defined(PETSC_HAVE_CRAY_VECTOR)
384: #pragma _CRI prefervector
385: #endif
386: for (j=0; j<nz; j++) {
387: ipos = ip[i] + j;
388: yp[i] += aa[ipos] * x[aj[ipos]];
389: }
390: }
391: } else {
392: /* Otherwise, there are enough rows in the chunk to make it
393: * worthwhile to vectorize across the rows, that is, to do the
394: * matvec by operating with "columns" of the chunk. */
395: for (j=0; j<nz; j++) {
396: for (i=0; i<isize; i++) {
397: ipos = ip[i] + j;
398: yp[i] += aa[ipos] * x[aj[ipos]];
399: }
400: }
401: }
403: #if defined(PETSC_HAVE_CRAY_VECTOR)
404: #pragma _CRI ivdep
405: #endif
406: /* Put results from yp[] into non-permuted result vector y. */
407: for (i=0; i<isize; i++) {
408: y[iperm[istart+i]] = yp[i];
409: }
410: } /* End processing chunk of isize rows of a group. */
411: } /* End handling matvec for chunk with nz > 1. */
412: } /* End loop over igroup. */
413: #endif
414: PetscLogFlops(PetscMax(2.0*a->nz - A->rmap->n,0));
415: VecRestoreArrayRead(xx,&x);
416: VecRestoreArray(yy,&y);
417: return(0);
418: }
421: /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx.
422: * Note that the names I used to designate the vectors differs from that
423: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
424: * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */
425: /*
426: I hate having virtually identical code for the mult and the multadd!!!
427: */
428: PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy)429: {
430: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
431: const PetscScalar *x;
432: PetscScalar *y,*w;
433: const MatScalar *aa;
434: PetscErrorCode ierr;
435: const PetscInt *aj,*ai;
436: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
437: PetscInt i,j;
438: #endif
440: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
441: Mat_SeqAIJPERM * aijperm;
442: PetscInt *iperm; /* Points to the permutation vector. */
443: PetscInt *xgroup;
444: /* Denotes where groups of rows with same number of nonzeros
445: * begin and end in iperm. */
446: PetscInt *nzgroup;
447: PetscInt ngroup;
448: PetscInt igroup;
449: PetscInt jstart,jend;
450: /* jstart is used in loops to denote the position in iperm where a
451: * group starts; jend denotes the position where it ends.
452: * (jend + 1 is where the next group starts.) */
453: PetscInt iold,nz;
454: PetscInt istart,iend,isize;
455: PetscInt ipos;
456: PetscScalar yp[NDIM];
457: PetscInt ip[NDIM];
458: /* yp[] and ip[] are treated as vector "registers" for performing
459: * the mat-vec. */
461: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
462: #pragma disjoint(*x,*y,*aa)
463: #endif
466: VecGetArrayRead(xx,&x);
467: VecGetArrayPair(yy,ww,&y,&w);
469: aj = a->j; /* aj[k] gives column index for element aa[k]. */
470: aa = a->a; /* Nonzero elements stored row-by-row. */
471: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
473: /* Get the info we need about the permutations and groupings. */
474: aijperm = (Mat_SeqAIJPERM*) A->spptr;
475: iperm = aijperm->iperm;
476: ngroup = aijperm->ngroup;
477: xgroup = aijperm->xgroup;
478: nzgroup = aijperm->nzgroup;
480: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
481: fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w);
482: #else
484: for (igroup=0; igroup<ngroup; igroup++) {
485: jstart = xgroup[igroup];
486: jend = xgroup[igroup+1] - 1;
488: nz = nzgroup[igroup];
490: /* Handle the special cases where the number of nonzeros per row
491: * in the group is either 0 or 1. */
492: if (nz == 0) {
493: for (i=jstart; i<=jend; i++) {
494: iold = iperm[i];
495: y[iold] = w[iold];
496: }
497: }
498: else if (nz == 1) {
499: for (i=jstart; i<=jend; i++) {
500: iold = iperm[i];
501: ipos = ai[iold];
502: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
503: }
504: }
505: /* For the general case: */
506: else {
508: /* We work our way through the current group in chunks of NDIM rows
509: * at a time. */
511: for (istart=jstart; istart<=jend; istart+=NDIM) {
512: /* Figure out where the chunk of 'isize' rows ends in iperm.
513: * 'isize may of course be less than NDIM for the last chunk. */
514: iend = istart + (NDIM - 1);
515: if (iend > jend) iend = jend;
516: isize = iend - istart + 1;
518: /* Initialize the yp[] array that will be used to hold part of
519: * the permuted results vector, and figure out where in aa each
520: * row of the chunk will begin. */
521: for (i=0; i<isize; i++) {
522: iold = iperm[istart + i];
523: /* iold is a row number from the matrix A *before* reordering. */
524: ip[i] = ai[iold];
525: /* ip[i] tells us where the ith row of the chunk begins in aa. */
526: yp[i] = w[iold];
527: }
529: /* If the number of zeros per row exceeds the number of rows in
530: * the chunk, we should vectorize along nz, that is, perform the
531: * mat-vec one row at a time as in the usual CSR case. */
532: if (nz > isize) {
533: #if defined(PETSC_HAVE_CRAY_VECTOR)
534: #pragma _CRI preferstream
535: #endif
536: for (i=0; i<isize; i++) {
537: #if defined(PETSC_HAVE_CRAY_VECTOR)
538: #pragma _CRI prefervector
539: #endif
540: for (j=0; j<nz; j++) {
541: ipos = ip[i] + j;
542: yp[i] += aa[ipos] * x[aj[ipos]];
543: }
544: }
545: }
546: /* Otherwise, there are enough rows in the chunk to make it
547: * worthwhile to vectorize across the rows, that is, to do the
548: * matvec by operating with "columns" of the chunk. */
549: else {
550: for (j=0; j<nz; j++) {
551: for (i=0; i<isize; i++) {
552: ipos = ip[i] + j;
553: yp[i] += aa[ipos] * x[aj[ipos]];
554: }
555: }
556: }
558: #if defined(PETSC_HAVE_CRAY_VECTOR)
559: #pragma _CRI ivdep
560: #endif
561: /* Put results from yp[] into non-permuted result vector y. */
562: for (i=0; i<isize; i++) {
563: y[iperm[istart+i]] = yp[i];
564: }
565: } /* End processing chunk of isize rows of a group. */
567: } /* End handling matvec for chunk with nz > 1. */
568: } /* End loop over igroup. */
570: #endif
571: PetscLogFlops(2.0*a->nz);
572: VecRestoreArrayRead(xx,&x);
573: VecRestoreArrayPair(yy,ww,&y,&w);
574: return(0);
575: }
578: /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a
579: * SeqAIJPERM matrix. This routine is called by the MatCreate_SeqAIJPERM()
580: * routine, but can also be used to convert an assembled SeqAIJ matrix
581: * into a SeqAIJPERM one. */
582: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat)583: {
585: Mat B = *newmat;
586: Mat_SeqAIJPERM *aijperm;
587: PetscBool sametype;
590: if (reuse == MAT_INITIAL_MATRIX) {
591: MatDuplicate(A,MAT_COPY_VALUES,&B);
592: }
593: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
594: if (sametype) return(0);
596: PetscNewLog(B,&aijperm);
597: B->spptr = (void*) aijperm;
599: /* Set function pointers for methods that we inherit from AIJ but override. */
600: B->ops->duplicate = MatDuplicate_SeqAIJPERM;
601: B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM;
602: B->ops->destroy = MatDestroy_SeqAIJPERM;
603: B->ops->mult = MatMult_SeqAIJPERM;
604: B->ops->multadd = MatMultAdd_SeqAIJPERM;
606: aijperm->nonzerostate = -1; /* this will trigger the generation of the permutation information the first time through MatAssembly()*/
607: /* If A has already been assembled, compute the permutation. */
608: if (A->assembled) {
609: MatSeqAIJPERM_create_perm(B);
610: }
612: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);
613: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijperm_C",MatMatMult_SeqDense_SeqAIJ);
614: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijperm_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
615: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijperm_C",MatMatMultNumeric_SeqDense_SeqAIJ);
617: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);
618: *newmat = B;
619: return(0);
620: }
622: /*@C
623: MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM.
624: This type inherits from AIJ, but calculates some additional permutation
625: information that is used to allow better vectorization of some
626: operations. At the cost of increased storage, the AIJ formatted
627: matrix can be copied to a format in which pieces of the matrix are
628: stored in ELLPACK format, allowing the vectorized matrix multiply
629: routine to use stride-1 memory accesses. As with the AIJ type, it is
630: important to preallocate matrix storage in order to get good assembly
631: performance.
633: Collective on MPI_Comm635: Input Parameters:
636: + comm - MPI communicator, set to PETSC_COMM_SELF637: . m - number of rows
638: . n - number of columns
639: . nz - number of nonzeros per row (same for all rows)
640: - nnz - array containing the number of nonzeros in the various rows
641: (possibly different for each row) or NULL
643: Output Parameter:
644: . A - the matrix
646: Notes:
647: If nnz is given then nz is ignored
649: Level: intermediate
651: .keywords: matrix, cray, sparse, parallel
653: .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
654: @*/
655: PetscErrorCodeMatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)656: {
660: MatCreate(comm,A);
661: MatSetSizes(*A,m,n,m,n);
662: MatSetType(*A,MATSEQAIJPERM);
663: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
664: return(0);
665: }
667: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJPERM(Mat A)668: {
672: MatSetType(A,MATSEQAIJ);
673: MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_INPLACE_MATRIX,&A);
674: return(0);
675: }