2: /*
3: Defines basic operations for the MATSEQAIJPERM matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but augments
6: it with some permutation information that enables some operations
7: to be more vectorizable. A physically rearranged copy of the matrix
8: may be stored if the user desires.
10: Eventually a variety of permutations may be supported.
11: */
13: #include <../src/mat/impls/aij/seq/aij.h>
15: #define NDIM 512 16: /* NDIM specifies how many rows at a time we should work with when
17: * performing the vectorized mat-vec. This depends on various factors
18: * such as vector register length, etc., and I really need to add a
19: * way for the user (or the library) to tune this. I'm setting it to
20: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
21: * routines. */
23: typedef struct {
24: PetscInt ngroup;
25: PetscInt *xgroup;
26: /* Denotes where groups of rows with same number of nonzeros
27: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
28: * where the ith group begins. */
29: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
30: PetscInt *iperm; /* The permutation vector. */
32: /* Flag that indicates whether we need to clean up permutation
33: * information during the MatDestroy. */
34: PetscBool CleanUpAIJPERM;
36: /* Some of this stuff is for Ed's recursive triangular solve.
37: * I'm not sure what I need yet. */
38: PetscInt blocksize;
39: PetscInt nstep;
40: PetscInt *jstart_list;
41: PetscInt *jend_list;
42: PetscInt *action_list;
43: PetscInt *ngroup_list;
44: PetscInt **ipointer_list;
45: PetscInt **xgroup_list;
46: PetscInt **nzgroup_list;
47: PetscInt **iperm_list;
48: } Mat_SeqAIJPERM;
50: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
52: EXTERN_C_BEGIN
55: PetscErrorCode MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 56: {
57: /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */
58: /* so we will ignore 'MatType type'. */
60: Mat B = *newmat;
61: Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr;
64: if (reuse == MAT_INITIAL_MATRIX) {
65: MatDuplicate(A,MAT_COPY_VALUES,&B);
66: }
68: /* Reset the original function pointers. */
69: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
70: B->ops->destroy = MatDestroy_SeqAIJ;
71: B->ops->duplicate = MatDuplicate_SeqAIJ;
73: /* Free everything in the Mat_SeqAIJPERM data structure.
74: * We don't free the Mat_SeqAIJPERM struct itself, as this will
75: * cause problems later when MatDestroy() tries to free it. */
76: if(aijperm->CleanUpAIJPERM) {
77: PetscFree(aijperm->xgroup);
78: PetscFree(aijperm->nzgroup);
79: PetscFree(aijperm->iperm);
80: }
82: /* Change the type of B to MATSEQAIJ. */
83: PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
84: 85: *newmat = B;
86: return(0);
87: }
88: EXTERN_C_END
92: PetscErrorCode MatDestroy_SeqAIJPERM(Mat A) 93: {
95: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;
98: /* Free everything in the Mat_SeqAIJPERM data structure.
99: * Note that we don't need to free the Mat_SeqAIJPERM struct
100: * itself, as MatDestroy() will do so. */
101: if(aijperm && aijperm->CleanUpAIJPERM) {
102: PetscFree(aijperm->xgroup);
103: PetscFree(aijperm->nzgroup);
104: PetscFree(aijperm->iperm);
105: }
106: PetscFree(A->spptr);
108: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
109: * to destroy everything that remains. */
110: PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
111: /* Note that I don't call MatSetType(). I believe this is because that
112: * is only to be called when *building* a matrix. I could be wrong, but
113: * that is how things work for the SuperLU matrix class. */
114: MatDestroy_SeqAIJ(A);
115: return(0);
116: }
118: PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M)119: {
121: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;
122: Mat_SeqAIJPERM *aijperm_dest = (Mat_SeqAIJPERM *) (*M)->spptr;
125: MatDuplicate_SeqAIJ(A,op,M);
126: PetscMemcpy((*M)->spptr,aijperm,sizeof(Mat_SeqAIJPERM));
127: /* Allocate space for, and copy the grouping and permutation info.
128: * I note that when the groups are initially determined in
129: * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than
130: * necessary. But at this point, we know how large they need to be, and
131: * allocate only the necessary amount of memory. So the duplicated matrix
132: * may actually use slightly less storage than the original! */
133: PetscMalloc(A->rmap->n*sizeof(PetscInt), aijperm_dest->iperm);
134: PetscMalloc((aijperm->ngroup+1)*sizeof(PetscInt), aijperm_dest->xgroup);
135: PetscMalloc((aijperm->ngroup)*sizeof(PetscInt), aijperm_dest->nzgroup);
136: PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);
137: PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));
138: PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);
139: return(0);
140: }
144: PetscErrorCode MatSeqAIJPERM_create_perm(Mat A)145: {
146: PetscInt m; /* Number of rows in the matrix. */
147: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
148: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
149: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
150: PetscInt *rows_in_bucket;
151: /* To construct the permutation, we sort each row into one of maxnz
152: * buckets based on how many nonzeros are in the row. */
153: PetscInt nz;
154: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
155: PetscInt *ipnz;
156: /* When constructing the iperm permutation vector,
157: * ipnz[nz] is used to point to the next place in the permutation vector
158: * that a row with nz nonzero elements should be placed.*/
159: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
160: /* Points to the MATSEQAIJPERM-specific data in the matrix B. */
162: PetscInt i, ngroup, istart, ipos;
164: /* I really ought to put something in here to check if B is of
165: * type MATSEQAIJPERM and return an error code if it is not.
166: * Come back and do this! */
168: m = A->rmap->n;
169: ia = a->i;
170: 171: /* Allocate the arrays that will hold the permutation vector. */
172: PetscMalloc(m*sizeof(PetscInt), &aijperm->iperm);
174: /* Allocate some temporary work arrays that will be used in
175: * calculating the permuation vector and groupings. */
176: PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
177: PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
178: PetscMalloc(m*sizeof(PetscInt), &nz_in_row);
180: /* Now actually figure out the permutation and grouping. */
182: /* First pass: Determine number of nonzeros in each row, maximum
183: * number of nonzeros in any row, and how many rows fall into each
184: * "bucket" of rows with same number of nonzeros. */
185: maxnz = 0;
186: for (i=0; i<m; i++) {
187: nz_in_row[i] = ia[i+1]-ia[i];
188: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
189: }
191: for (i=0; i<=maxnz; i++) {
192: rows_in_bucket[i] = 0;
193: }
194: for (i=0; i<m; i++) {
195: nz = nz_in_row[i];
196: rows_in_bucket[nz]++;
197: }
199: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
200: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
201: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
202: * then xgroup[] must consist of (maxnz + 2) elements, since the last
203: * element of xgroup will tell us where the (maxnz + 1)th group ends.
204: * We allocate space for the maximum number of groups;
205: * that is potentially a little wasteful, but not too much so.
206: * Perhaps I should fix it later. */
207: PetscMalloc((maxnz+2)*sizeof(PetscInt), &aijperm->xgroup);
208: PetscMalloc((maxnz+1)*sizeof(PetscInt), &aijperm->nzgroup);
210: /* Second pass. Look at what is in the buckets and create the groupings.
211: * Note that it is OK to have a group of rows with no non-zero values. */
212: ngroup = 0;
213: istart = 0;
214: for (i=0; i<=maxnz; i++) {
215: if (rows_in_bucket[i] > 0) {
216: aijperm->nzgroup[ngroup] = i;
217: aijperm->xgroup[ngroup] = istart;
218: ngroup++;
219: istart += rows_in_bucket[i];
220: }
221: }
223: aijperm->xgroup[ngroup] = istart;
224: aijperm->ngroup = ngroup;
226: /* Now fill in the permutation vector iperm. */
227: ipnz[0] = 0;
228: for (i=0; i<maxnz; i++) {
229: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
230: }
232: for (i=0; i<m; i++) {
233: nz = nz_in_row[i];
234: ipos = ipnz[nz];
235: aijperm->iperm[ipos] = i;
236: ipnz[nz]++;
237: }
239: /* Clean up temporary work arrays. */
240: PetscFree(rows_in_bucket);
241: PetscFree(ipnz);
242: PetscFree(nz_in_row);
244: /* Since we've allocated some memory to hold permutation info,
245: * flip the CleanUpAIJPERM flag to true. */
246: aijperm->CleanUpAIJPERM = PETSC_TRUE;
247: return(0);
248: }
253: PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode)254: {
256: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
259: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
260: 261: /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some
262: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
263: * I'm not sure if this is the best way to do this, but it avoids
264: * a lot of code duplication.
265: * I also note that currently MATSEQAIJPERM doesn't know anything about
266: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
267: * are many zero rows. If the SeqAIJ assembly end routine decides to use
268: * this, this may break things. (Don't know... haven't looked at it.) */
269: a->inode.use = PETSC_FALSE;
270: MatAssemblyEnd_SeqAIJ(A, mode);
272: /* Now calculate the permutation and grouping information. */
273: MatSeqAIJPERM_create_perm(A);
274: return(0);
275: }
279: PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)280: {
281: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
282: const PetscScalar *x;
283: PetscScalar *y;
284: const MatScalar *aa;
285: PetscErrorCode ierr;
286: const PetscInt *aj,*ai;
287: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
288: PetscInt i,j;
289: #endif
291: /* Variables that don't appear in MatMult_SeqAIJ. */
292: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;
293: PetscInt *iperm; /* Points to the permutation vector. */
294: PetscInt *xgroup;
295: /* Denotes where groups of rows with same number of nonzeros
296: * begin and end in iperm. */
297: PetscInt *nzgroup;
298: PetscInt ngroup;
299: PetscInt igroup;
300: PetscInt jstart,jend;
301: /* jstart is used in loops to denote the position in iperm where a
302: * group starts; jend denotes the position where it ends.
303: * (jend + 1 is where the next group starts.) */
304: PetscInt iold,nz;
305: PetscInt istart,iend,isize;
306: PetscInt ipos;
307: PetscScalar yp[NDIM];
308: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
310: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
311: #pragma disjoint(*x,*y,*aa)
312: #endif
315: VecGetArrayRead(xx,&x);
316: VecGetArray(yy,&y);
317: aj = a->j; /* aj[k] gives column index for element aa[k]. */
318: aa = a->a; /* Nonzero elements stored row-by-row. */
319: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
321: /* Get the info we need about the permutations and groupings. */
322: iperm = aijperm->iperm;
323: ngroup = aijperm->ngroup;
324: xgroup = aijperm->xgroup;
325: nzgroup = aijperm->nzgroup;
326: 327: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)
328: fortranmultaijperm_(&m,x,ii,aj,aa,y);
329: #else
331: for (igroup=0; igroup<ngroup; igroup++) {
332: jstart = xgroup[igroup];
333: jend = xgroup[igroup+1] - 1;
334: nz = nzgroup[igroup];
336: /* Handle the special cases where the number of nonzeros per row
337: * in the group is either 0 or 1. */
338: if (nz == 0) {
339: for (i=jstart; i<=jend; i++) {
340: y[iperm[i]] = 0.0;
341: }
342: } else if (nz == 1) {
343: for (i=jstart; i<=jend; i++) {
344: iold = iperm[i];
345: ipos = ai[iold];
346: y[iold] = aa[ipos] * x[aj[ipos]];
347: }
348: } else {
349: 350: /* We work our way through the current group in chunks of NDIM rows
351: * at a time. */
353: for (istart=jstart; istart<=jend; istart+=NDIM) {
354: /* Figure out where the chunk of 'isize' rows ends in iperm.
355: * 'isize may of course be less than NDIM for the last chunk. */
356: iend = istart + (NDIM - 1);
357: if (iend > jend) { iend = jend; }
358: isize = iend - istart + 1;
360: /* Initialize the yp[] array that will be used to hold part of
361: * the permuted results vector, and figure out where in aa each
362: * row of the chunk will begin. */
363: for (i=0; i<isize; i++) {
364: iold = iperm[istart + i];
365: /* iold is a row number from the matrix A *before* reordering. */
366: ip[i] = ai[iold];
367: /* ip[i] tells us where the ith row of the chunk begins in aa. */
368: yp[i] = (PetscScalar) 0.0;
369: }
371: /* If the number of zeros per row exceeds the number of rows in
372: * the chunk, we should vectorize along nz, that is, perform the
373: * mat-vec one row at a time as in the usual CSR case. */
374: if (nz > isize) {
375: #if defined(PETSC_HAVE_CRAY_VECTOR)
376: #pragma _CRI preferstream
377: #endif
378: for (i=0; i<isize; i++) {
379: #if defined(PETSC_HAVE_CRAY_VECTOR)
380: #pragma _CRI prefervector
381: #endif
382: for (j=0; j<nz; j++) {
383: ipos = ip[i] + j;
384: yp[i] += aa[ipos] * x[aj[ipos]];
385: }
386: }
387: } else {
388: /* Otherwise, there are enough rows in the chunk to make it
389: * worthwhile to vectorize across the rows, that is, to do the
390: * matvec by operating with "columns" of the chunk. */
391: for (j=0; j<nz; j++) {
392: for(i=0; i<isize; i++) {
393: ipos = ip[i] + j;
394: yp[i] += aa[ipos] * x[aj[ipos]];
395: }
396: }
397: }
399: #if defined(PETSC_HAVE_CRAY_VECTOR)
400: #pragma _CRI ivdep
401: #endif
402: /* Put results from yp[] into non-permuted result vector y. */
403: for (i=0; i<isize; i++) {
404: y[iperm[istart+i]] = yp[i];
405: }
406: } /* End processing chunk of isize rows of a group. */
407: } /* End handling matvec for chunk with nz > 1. */
408: } /* End loop over igroup. */
409: #endif
410: PetscLogFlops(2.0*a->nz - A->rmap->n);
411: VecRestoreArrayRead(xx,&x);
412: VecRestoreArray(yy,&y);
413: return(0);
414: }
417: /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx.
418: * Note that the names I used to designate the vectors differs from that
419: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
420: * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */
421: /*
422: I hate having virtually identical code for the mult and the multadd!!!
423: */
426: PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy)427: {
428: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
429: const PetscScalar *x;
430: PetscScalar *y,*w;
431: const MatScalar *aa;
432: PetscErrorCode ierr;
433: const PetscInt *aj,*ai;
434: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
435: PetscInt i,j;
436: #endif
438: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
439: Mat_SeqAIJPERM * aijperm;
440: PetscInt *iperm; /* Points to the permutation vector. */
441: PetscInt *xgroup;
442: /* Denotes where groups of rows with same number of nonzeros
443: * begin and end in iperm. */
444: PetscInt *nzgroup;
445: PetscInt ngroup;
446: PetscInt igroup;
447: PetscInt jstart,jend;
448: /* jstart is used in loops to denote the position in iperm where a
449: * group starts; jend denotes the position where it ends.
450: * (jend + 1 is where the next group starts.) */
451: PetscInt iold,nz;
452: PetscInt istart,iend,isize;
453: PetscInt ipos;
454: PetscScalar yp[NDIM];
455: PetscInt ip[NDIM];
456: /* yp[] and ip[] are treated as vector "registers" for performing
457: * the mat-vec. */
459: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
460: #pragma disjoint(*x,*y,*aa)
461: #endif
464: VecGetArrayRead(xx,&x);
465: VecGetArray(yy,&y);
466: if (yy != ww) {
467: VecGetArray(ww,&w);
468: } else {
469: w = y;
470: }
472: aj = a->j; /* aj[k] gives column index for element aa[k]. */
473: aa = a->a; /* Nonzero elements stored row-by-row. */
474: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
476: /* Get the info we need about the permutations and groupings. */
477: aijperm = (Mat_SeqAIJPERM *) A->spptr;
478: iperm = aijperm->iperm;
479: ngroup = aijperm->ngroup;
480: xgroup = aijperm->xgroup;
481: nzgroup = aijperm->nzgroup;
482: 483: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
484: fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w);
485: #else
487: for(igroup=0; igroup<ngroup; igroup++) {
488: jstart = xgroup[igroup];
489: jend = xgroup[igroup+1] - 1;
491: nz = nzgroup[igroup];
493: /* Handle the special cases where the number of nonzeros per row
494: * in the group is either 0 or 1. */
495: if(nz == 0) {
496: for(i=jstart; i<=jend; i++) {
497: iold = iperm[i];
498: y[iold] = w[iold];
499: }
500: }
501: else if(nz == 1) {
502: for(i=jstart; i<=jend; i++) {
503: iold = iperm[i];
504: ipos = ai[iold];
505: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
506: }
507: }
508: /* For the general case: */
509: else {
510: 511: /* We work our way through the current group in chunks of NDIM rows
512: * at a time. */
514: for(istart=jstart; istart<=jend; istart+=NDIM) {
515: /* Figure out where the chunk of 'isize' rows ends in iperm.
516: * 'isize may of course be less than NDIM for the last chunk. */
517: iend = istart + (NDIM - 1);
518: if(iend > jend) { iend = jend; }
519: isize = iend - istart + 1;
521: /* Initialize the yp[] array that will be used to hold part of
522: * the permuted results vector, and figure out where in aa each
523: * row of the chunk will begin. */
524: for(i=0; i<isize; i++) {
525: iold = iperm[istart + i];
526: /* iold is a row number from the matrix A *before* reordering. */
527: ip[i] = ai[iold];
528: /* ip[i] tells us where the ith row of the chunk begins in aa. */
529: yp[i] = w[iold];
530: }
532: /* If the number of zeros per row exceeds the number of rows in
533: * the chunk, we should vectorize along nz, that is, perform the
534: * mat-vec one row at a time as in the usual CSR case. */
535: if(nz > isize) {
536: #if defined(PETSC_HAVE_CRAY_VECTOR)
537: #pragma _CRI preferstream
538: #endif
539: for(i=0; i<isize; i++) {
540: #if defined(PETSC_HAVE_CRAY_VECTOR)
541: #pragma _CRI prefervector
542: #endif
543: for(j=0; j<nz; j++) {
544: ipos = ip[i] + j;
545: yp[i] += aa[ipos] * x[aj[ipos]];
546: }
547: }
548: }
549: /* Otherwise, there are enough rows in the chunk to make it
550: * worthwhile to vectorize across the rows, that is, to do the
551: * matvec by operating with "columns" of the chunk. */
552: else {
553: for(j=0; j<nz; j++) {
554: for(i=0; i<isize; i++) {
555: ipos = ip[i] + j;
556: yp[i] += aa[ipos] * x[aj[ipos]];
557: }
558: }
559: }
561: #if defined(PETSC_HAVE_CRAY_VECTOR)
562: #pragma _CRI ivdep
563: #endif
564: /* Put results from yp[] into non-permuted result vector y. */
565: for(i=0; i<isize; i++) {
566: y[iperm[istart+i]] = yp[i];
567: }
568: } /* End processing chunk of isize rows of a group. */
569: 570: } /* End handling matvec for chunk with nz > 1. */
571: } /* End loop over igroup. */
573: #endif
574: PetscLogFlops(2.0*a->nz);
575: VecRestoreArrayRead(xx,&x);
576: VecRestoreArray(yy,&y);
577: if (yy != ww) {
578: VecRestoreArray(ww,&w);
579: }
580: return(0);
581: }
584: /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a
585: * SeqAIJPERM matrix. This routine is called by the MatCreate_SeqAIJPERM()
586: * routine, but can also be used to convert an assembled SeqAIJ matrix
587: * into a SeqAIJPERM one. */
588: EXTERN_C_BEGIN
591: PetscErrorCode MatConvert_SeqAIJ_SeqAIJPERM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)592: {
594: Mat B = *newmat;
595: Mat_SeqAIJPERM *aijperm;
598: if (reuse == MAT_INITIAL_MATRIX) {
599: MatDuplicate(A,MAT_COPY_VALUES,&B);
600: }
602: PetscNewLog(B,Mat_SeqAIJPERM,&aijperm);
603: B->spptr = (void *) aijperm;
605: /* Set function pointers for methods that we inherit from AIJ but override. */
606: B->ops->duplicate = MatDuplicate_SeqAIJPERM;
607: B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM;
608: B->ops->destroy = MatDestroy_SeqAIJPERM;
609: B->ops->mult = MatMult_SeqAIJPERM;
610: B->ops->multadd = MatMultAdd_SeqAIJPERM;
612: /* If A has already been assembled, compute the permutation. */
613: if (A->assembled) {
614: MatSeqAIJPERM_create_perm(B);
615: }
616: 617: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaijperm_seqaij_C","MatConvert_SeqAIJPERM_SeqAIJ",MatConvert_SeqAIJPERM_SeqAIJ);
619: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);
620: *newmat = B;
621: return(0);
622: }
623: EXTERN_C_END
628: /*@C
629: MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM.
630: This type inherits from AIJ, but calculates some additional permutation
631: information that is used to allow better vectorization of some
632: operations. At the cost of increased storage, the AIJ formatted
633: matrix can be copied to a format in which pieces of the matrix are
634: stored in ELLPACK format, allowing the vectorized matrix multiply
635: routine to use stride-1 memory accesses. As with the AIJ type, it is
636: important to preallocate matrix storage in order to get good assembly
637: performance.
638: 639: Collective on MPI_Comm641: Input Parameters:
642: + comm - MPI communicator, set to PETSC_COMM_SELF643: . m - number of rows
644: . n - number of columns
645: . nz - number of nonzeros per row (same for all rows)
646: - nnz - array containing the number of nonzeros in the various rows
647: (possibly different for each row) or PETSC_NULL649: Output Parameter:
650: . A - the matrix
652: Notes:
653: If nnz is given then nz is ignored
655: Level: intermediate
657: .keywords: matrix, cray, sparse, parallel
659: .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
660: @*/
661: PetscErrorCodeMatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)662: {
666: MatCreate(comm,A);
667: MatSetSizes(*A,m,n,m,n);
668: MatSetType(*A,MATSEQAIJPERM);
669: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
670: return(0);
671: }
673: EXTERN_C_BEGIN
676: PetscErrorCode MatCreate_SeqAIJPERM(Mat A)677: {
681: MatSetType(A,MATSEQAIJ);
682: MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_REUSE_MATRIX,&A);
683: return(0);
684: }
685: EXTERN_C_END