2: /*
3: Defines basic operations for the MATSEQAIJPERM matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but augments
6: it with some permutation information that enables some operations
7: to be more vectorizable. A physically rearranged copy of the matrix
8: may be stored if the user desires.
10: Eventually a variety of permutations may be supported.
11: */
13: #include <../src/mat/impls/aij/seq/aij.h>
15: #define NDIM 512 16: /* NDIM specifies how many rows at a time we should work with when
17: * performing the vectorized mat-vec. This depends on various factors
18: * such as vector register length, etc., and I really need to add a
19: * way for the user (or the library) to tune this. I'm setting it to
20: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
21: * routines. */
23: typedef struct {
24: PetscInt ngroup;
25: PetscInt *xgroup;
26: /* Denotes where groups of rows with same number of nonzeros
27: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
28: * where the ith group begins. */
29: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
30: PetscInt *iperm; /* The permutation vector. */
32: /* Flag that indicates whether we need to clean up permutation
33: * information during the MatDestroy. */
34: PetscBool CleanUpAIJPERM;
36: /* Some of this stuff is for Ed's recursive triangular solve.
37: * I'm not sure what I need yet. */
38: PetscInt blocksize;
39: PetscInt nstep;
40: PetscInt *jstart_list;
41: PetscInt *jend_list;
42: PetscInt *action_list;
43: PetscInt *ngroup_list;
44: PetscInt **ipointer_list;
45: PetscInt **xgroup_list;
46: PetscInt **nzgroup_list;
47: PetscInt **iperm_list;
48: } Mat_SeqAIJPERM;
50: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
54: PETSC_EXTERN PetscErrorCode MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 55: {
56: /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */
57: /* so we will ignore 'MatType type'. */
59: Mat B = *newmat;
60: Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr;
63: if (reuse == MAT_INITIAL_MATRIX) {
64: MatDuplicate(A,MAT_COPY_VALUES,&B);
65: }
67: /* Reset the original function pointers. */
68: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
69: B->ops->destroy = MatDestroy_SeqAIJ;
70: B->ops->duplicate = MatDuplicate_SeqAIJ;
72: /* Free everything in the Mat_SeqAIJPERM data structure.
73: * We don't free the Mat_SeqAIJPERM struct itself, as this will
74: * cause problems later when MatDestroy() tries to free it. */
75: if (aijperm->CleanUpAIJPERM) {
76: PetscFree(aijperm->xgroup);
77: PetscFree(aijperm->nzgroup);
78: PetscFree(aijperm->iperm);
79: }
81: /* Change the type of B to MATSEQAIJ. */
82: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
84: *newmat = B;
85: return(0);
86: }
90: PetscErrorCode MatDestroy_SeqAIJPERM(Mat A) 91: {
93: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
96: /* Free everything in the Mat_SeqAIJPERM data structure.
97: * Note that we don't need to free the Mat_SeqAIJPERM struct
98: * itself, as MatDestroy() will do so. */
99: if (aijperm && aijperm->CleanUpAIJPERM) {
100: PetscFree(aijperm->xgroup);
101: PetscFree(aijperm->nzgroup);
102: PetscFree(aijperm->iperm);
103: }
104: PetscFree(A->spptr);
106: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
107: * to destroy everything that remains. */
108: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
109: /* Note that I don't call MatSetType(). I believe this is because that
110: * is only to be called when *building* a matrix. I could be wrong, but
111: * that is how things work for the SuperLU matrix class. */
112: MatDestroy_SeqAIJ(A);
113: return(0);
114: }
116: PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M)117: {
119: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
120: Mat_SeqAIJPERM *aijperm_dest = (Mat_SeqAIJPERM*) (*M)->spptr;
123: MatDuplicate_SeqAIJ(A,op,M);
124: PetscMemcpy((*M)->spptr,aijperm,sizeof(Mat_SeqAIJPERM));
125: /* Allocate space for, and copy the grouping and permutation info.
126: * I note that when the groups are initially determined in
127: * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than
128: * necessary. But at this point, we know how large they need to be, and
129: * allocate only the necessary amount of memory. So the duplicated matrix
130: * may actually use slightly less storage than the original! */
131: PetscMalloc(A->rmap->n*sizeof(PetscInt), aijperm_dest->iperm);
132: PetscMalloc((aijperm->ngroup+1)*sizeof(PetscInt), aijperm_dest->xgroup);
133: PetscMalloc((aijperm->ngroup)*sizeof(PetscInt), aijperm_dest->nzgroup);
134: PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);
135: PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));
136: PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);
137: return(0);
138: }
142: PetscErrorCode MatSeqAIJPERM_create_perm(Mat A)143: {
144: PetscInt m; /* Number of rows in the matrix. */
145: Mat_SeqAIJ *a = (Mat_SeqAIJ*)(A)->data;
146: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
147: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
148: PetscInt *rows_in_bucket;
149: /* To construct the permutation, we sort each row into one of maxnz
150: * buckets based on how many nonzeros are in the row. */
151: PetscInt nz;
152: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
153: PetscInt *ipnz;
154: /* When constructing the iperm permutation vector,
155: * ipnz[nz] is used to point to the next place in the permutation vector
156: * that a row with nz nonzero elements should be placed.*/
157: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
158: /* Points to the MATSEQAIJPERM-specific data in the matrix B. */
160: PetscInt i, ngroup, istart, ipos;
162: /* I really ought to put something in here to check if B is of
163: * type MATSEQAIJPERM and return an error code if it is not.
164: * Come back and do this! */
166: m = A->rmap->n;
167: ia = a->i;
169: /* Allocate the arrays that will hold the permutation vector. */
170: PetscMalloc(m*sizeof(PetscInt), &aijperm->iperm);
172: /* Allocate some temporary work arrays that will be used in
173: * calculating the permuation vector and groupings. */
174: PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
175: PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
176: PetscMalloc(m*sizeof(PetscInt), &nz_in_row);
178: /* Now actually figure out the permutation and grouping. */
180: /* First pass: Determine number of nonzeros in each row, maximum
181: * number of nonzeros in any row, and how many rows fall into each
182: * "bucket" of rows with same number of nonzeros. */
183: maxnz = 0;
184: for (i=0; i<m; i++) {
185: nz_in_row[i] = ia[i+1]-ia[i];
186: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
187: }
189: for (i=0; i<=maxnz; i++) {
190: rows_in_bucket[i] = 0;
191: }
192: for (i=0; i<m; i++) {
193: nz = nz_in_row[i];
194: rows_in_bucket[nz]++;
195: }
197: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
198: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
199: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
200: * then xgroup[] must consist of (maxnz + 2) elements, since the last
201: * element of xgroup will tell us where the (maxnz + 1)th group ends.
202: * We allocate space for the maximum number of groups;
203: * that is potentially a little wasteful, but not too much so.
204: * Perhaps I should fix it later. */
205: PetscMalloc((maxnz+2)*sizeof(PetscInt), &aijperm->xgroup);
206: PetscMalloc((maxnz+1)*sizeof(PetscInt), &aijperm->nzgroup);
208: /* Second pass. Look at what is in the buckets and create the groupings.
209: * Note that it is OK to have a group of rows with no non-zero values. */
210: ngroup = 0;
211: istart = 0;
212: for (i=0; i<=maxnz; i++) {
213: if (rows_in_bucket[i] > 0) {
214: aijperm->nzgroup[ngroup] = i;
215: aijperm->xgroup[ngroup] = istart;
216: ngroup++;
217: istart += rows_in_bucket[i];
218: }
219: }
221: aijperm->xgroup[ngroup] = istart;
222: aijperm->ngroup = ngroup;
224: /* Now fill in the permutation vector iperm. */
225: ipnz[0] = 0;
226: for (i=0; i<maxnz; i++) {
227: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
228: }
230: for (i=0; i<m; i++) {
231: nz = nz_in_row[i];
232: ipos = ipnz[nz];
233: aijperm->iperm[ipos] = i;
234: ipnz[nz]++;
235: }
237: /* Clean up temporary work arrays. */
238: PetscFree(rows_in_bucket);
239: PetscFree(ipnz);
240: PetscFree(nz_in_row);
242: /* Since we've allocated some memory to hold permutation info,
243: * flip the CleanUpAIJPERM flag to true. */
244: aijperm->CleanUpAIJPERM = PETSC_TRUE;
245: return(0);
246: }
251: PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode)252: {
254: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
257: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
259: /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some
260: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
261: * I'm not sure if this is the best way to do this, but it avoids
262: * a lot of code duplication.
263: * I also note that currently MATSEQAIJPERM doesn't know anything about
264: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
265: * are many zero rows. If the SeqAIJ assembly end routine decides to use
266: * this, this may break things. (Don't know... haven't looked at it.) */
267: a->inode.use = PETSC_FALSE;
268: MatAssemblyEnd_SeqAIJ(A, mode);
270: /* Now calculate the permutation and grouping information. */
271: MatSeqAIJPERM_create_perm(A);
272: return(0);
273: }
277: PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)278: {
279: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
280: const PetscScalar *x;
281: PetscScalar *y;
282: const MatScalar *aa;
283: PetscErrorCode ierr;
284: const PetscInt *aj,*ai;
285: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
286: PetscInt i,j;
287: #endif
289: /* Variables that don't appear in MatMult_SeqAIJ. */
290: Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
291: PetscInt *iperm; /* Points to the permutation vector. */
292: PetscInt *xgroup;
293: /* Denotes where groups of rows with same number of nonzeros
294: * begin and end in iperm. */
295: PetscInt *nzgroup;
296: PetscInt ngroup;
297: PetscInt igroup;
298: PetscInt jstart,jend;
299: /* jstart is used in loops to denote the position in iperm where a
300: * group starts; jend denotes the position where it ends.
301: * (jend + 1 is where the next group starts.) */
302: PetscInt iold,nz;
303: PetscInt istart,iend,isize;
304: PetscInt ipos;
305: PetscScalar yp[NDIM];
306: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
308: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
309: #pragma disjoint(*x,*y,*aa)
310: #endif
313: VecGetArrayRead(xx,&x);
314: VecGetArray(yy,&y);
315: aj = a->j; /* aj[k] gives column index for element aa[k]. */
316: aa = a->a; /* Nonzero elements stored row-by-row. */
317: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
319: /* Get the info we need about the permutations and groupings. */
320: iperm = aijperm->iperm;
321: ngroup = aijperm->ngroup;
322: xgroup = aijperm->xgroup;
323: nzgroup = aijperm->nzgroup;
325: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)
326: fortranmultaijperm_(&m,x,ii,aj,aa,y);
327: #else
329: for (igroup=0; igroup<ngroup; igroup++) {
330: jstart = xgroup[igroup];
331: jend = xgroup[igroup+1] - 1;
332: nz = nzgroup[igroup];
334: /* Handle the special cases where the number of nonzeros per row
335: * in the group is either 0 or 1. */
336: if (nz == 0) {
337: for (i=jstart; i<=jend; i++) {
338: y[iperm[i]] = 0.0;
339: }
340: } else if (nz == 1) {
341: for (i=jstart; i<=jend; i++) {
342: iold = iperm[i];
343: ipos = ai[iold];
344: y[iold] = aa[ipos] * x[aj[ipos]];
345: }
346: } else {
348: /* We work our way through the current group in chunks of NDIM rows
349: * at a time. */
351: for (istart=jstart; istart<=jend; istart+=NDIM) {
352: /* Figure out where the chunk of 'isize' rows ends in iperm.
353: * 'isize may of course be less than NDIM for the last chunk. */
354: iend = istart + (NDIM - 1);
356: if (iend > jend) iend = jend;
358: isize = iend - istart + 1;
360: /* Initialize the yp[] array that will be used to hold part of
361: * the permuted results vector, and figure out where in aa each
362: * row of the chunk will begin. */
363: for (i=0; i<isize; i++) {
364: iold = iperm[istart + i];
365: /* iold is a row number from the matrix A *before* reordering. */
366: ip[i] = ai[iold];
367: /* ip[i] tells us where the ith row of the chunk begins in aa. */
368: yp[i] = (PetscScalar) 0.0;
369: }
371: /* If the number of zeros per row exceeds the number of rows in
372: * the chunk, we should vectorize along nz, that is, perform the
373: * mat-vec one row at a time as in the usual CSR case. */
374: if (nz > isize) {
375: #if defined(PETSC_HAVE_CRAY_VECTOR)
376: #pragma _CRI preferstream
377: #endif
378: for (i=0; i<isize; i++) {
379: #if defined(PETSC_HAVE_CRAY_VECTOR)
380: #pragma _CRI prefervector
381: #endif
382: for (j=0; j<nz; j++) {
383: ipos = ip[i] + j;
384: yp[i] += aa[ipos] * x[aj[ipos]];
385: }
386: }
387: } else {
388: /* Otherwise, there are enough rows in the chunk to make it
389: * worthwhile to vectorize across the rows, that is, to do the
390: * matvec by operating with "columns" of the chunk. */
391: for (j=0; j<nz; j++) {
392: for (i=0; i<isize; i++) {
393: ipos = ip[i] + j;
394: yp[i] += aa[ipos] * x[aj[ipos]];
395: }
396: }
397: }
399: #if defined(PETSC_HAVE_CRAY_VECTOR)
400: #pragma _CRI ivdep
401: #endif
402: /* Put results from yp[] into non-permuted result vector y. */
403: for (i=0; i<isize; i++) {
404: y[iperm[istart+i]] = yp[i];
405: }
406: } /* End processing chunk of isize rows of a group. */
407: } /* End handling matvec for chunk with nz > 1. */
408: } /* End loop over igroup. */
409: #endif
410: PetscLogFlops(2.0*a->nz - A->rmap->n);
411: VecRestoreArrayRead(xx,&x);
412: VecRestoreArray(yy,&y);
413: return(0);
414: }
417: /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx.
418: * Note that the names I used to designate the vectors differs from that
419: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
420: * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */
421: /*
422: I hate having virtually identical code for the mult and the multadd!!!
423: */
426: PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy)427: {
428: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
429: const PetscScalar *x;
430: PetscScalar *y,*w;
431: const MatScalar *aa;
432: PetscErrorCode ierr;
433: const PetscInt *aj,*ai;
434: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
435: PetscInt i,j;
436: #endif
438: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
439: Mat_SeqAIJPERM * aijperm;
440: PetscInt *iperm; /* Points to the permutation vector. */
441: PetscInt *xgroup;
442: /* Denotes where groups of rows with same number of nonzeros
443: * begin and end in iperm. */
444: PetscInt *nzgroup;
445: PetscInt ngroup;
446: PetscInt igroup;
447: PetscInt jstart,jend;
448: /* jstart is used in loops to denote the position in iperm where a
449: * group starts; jend denotes the position where it ends.
450: * (jend + 1 is where the next group starts.) */
451: PetscInt iold,nz;
452: PetscInt istart,iend,isize;
453: PetscInt ipos;
454: PetscScalar yp[NDIM];
455: PetscInt ip[NDIM];
456: /* yp[] and ip[] are treated as vector "registers" for performing
457: * the mat-vec. */
459: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
460: #pragma disjoint(*x,*y,*aa)
461: #endif
464: VecGetArrayRead(xx,&x);
465: VecGetArray(yy,&y);
466: if (yy != ww) {
467: VecGetArray(ww,&w);
468: } else {
469: w = y;
470: }
472: aj = a->j; /* aj[k] gives column index for element aa[k]. */
473: aa = a->a; /* Nonzero elements stored row-by-row. */
474: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
476: /* Get the info we need about the permutations and groupings. */
477: aijperm = (Mat_SeqAIJPERM*) A->spptr;
478: iperm = aijperm->iperm;
479: ngroup = aijperm->ngroup;
480: xgroup = aijperm->xgroup;
481: nzgroup = aijperm->nzgroup;
483: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
484: fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w);
485: #else
487: for (igroup=0; igroup<ngroup; igroup++) {
488: jstart = xgroup[igroup];
489: jend = xgroup[igroup+1] - 1;
491: nz = nzgroup[igroup];
493: /* Handle the special cases where the number of nonzeros per row
494: * in the group is either 0 or 1. */
495: if (nz == 0) {
496: for (i=jstart; i<=jend; i++) {
497: iold = iperm[i];
498: y[iold] = w[iold];
499: }
500: }
501: else if (nz == 1) {
502: for (i=jstart; i<=jend; i++) {
503: iold = iperm[i];
504: ipos = ai[iold];
505: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
506: }
507: }
508: /* For the general case: */
509: else {
511: /* We work our way through the current group in chunks of NDIM rows
512: * at a time. */
514: for (istart=jstart; istart<=jend; istart+=NDIM) {
515: /* Figure out where the chunk of 'isize' rows ends in iperm.
516: * 'isize may of course be less than NDIM for the last chunk. */
517: iend = istart + (NDIM - 1);
518: if (iend > jend) iend = jend;
519: isize = iend - istart + 1;
521: /* Initialize the yp[] array that will be used to hold part of
522: * the permuted results vector, and figure out where in aa each
523: * row of the chunk will begin. */
524: for (i=0; i<isize; i++) {
525: iold = iperm[istart + i];
526: /* iold is a row number from the matrix A *before* reordering. */
527: ip[i] = ai[iold];
528: /* ip[i] tells us where the ith row of the chunk begins in aa. */
529: yp[i] = w[iold];
530: }
532: /* If the number of zeros per row exceeds the number of rows in
533: * the chunk, we should vectorize along nz, that is, perform the
534: * mat-vec one row at a time as in the usual CSR case. */
535: if (nz > isize) {
536: #if defined(PETSC_HAVE_CRAY_VECTOR)
537: #pragma _CRI preferstream
538: #endif
539: for (i=0; i<isize; i++) {
540: #if defined(PETSC_HAVE_CRAY_VECTOR)
541: #pragma _CRI prefervector
542: #endif
543: for (j=0; j<nz; j++) {
544: ipos = ip[i] + j;
545: yp[i] += aa[ipos] * x[aj[ipos]];
546: }
547: }
548: }
549: /* Otherwise, there are enough rows in the chunk to make it
550: * worthwhile to vectorize across the rows, that is, to do the
551: * matvec by operating with "columns" of the chunk. */
552: else {
553: for (j=0; j<nz; j++) {
554: for (i=0; i<isize; i++) {
555: ipos = ip[i] + j;
556: yp[i] += aa[ipos] * x[aj[ipos]];
557: }
558: }
559: }
561: #if defined(PETSC_HAVE_CRAY_VECTOR)
562: #pragma _CRI ivdep
563: #endif
564: /* Put results from yp[] into non-permuted result vector y. */
565: for (i=0; i<isize; i++) {
566: y[iperm[istart+i]] = yp[i];
567: }
568: } /* End processing chunk of isize rows of a group. */
570: } /* End handling matvec for chunk with nz > 1. */
571: } /* End loop over igroup. */
573: #endif
574: PetscLogFlops(2.0*a->nz);
575: VecRestoreArrayRead(xx,&x);
576: VecRestoreArray(yy,&y);
577: if (yy != ww) {
578: VecRestoreArray(ww,&w);
579: }
580: return(0);
581: }
584: /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a
585: * SeqAIJPERM matrix. This routine is called by the MatCreate_SeqAIJPERM()
586: * routine, but can also be used to convert an assembled SeqAIJ matrix
587: * into a SeqAIJPERM one. */
590: PETSC_EXTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat)591: {
593: Mat B = *newmat;
594: Mat_SeqAIJPERM *aijperm;
597: if (reuse == MAT_INITIAL_MATRIX) {
598: MatDuplicate(A,MAT_COPY_VALUES,&B);
599: }
601: PetscNewLog(B,Mat_SeqAIJPERM,&aijperm);
602: B->spptr = (void*) aijperm;
604: /* Set function pointers for methods that we inherit from AIJ but override. */
605: B->ops->duplicate = MatDuplicate_SeqAIJPERM;
606: B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM;
607: B->ops->destroy = MatDestroy_SeqAIJPERM;
608: B->ops->mult = MatMult_SeqAIJPERM;
609: B->ops->multadd = MatMultAdd_SeqAIJPERM;
611: /* If A has already been assembled, compute the permutation. */
612: if (A->assembled) {
613: MatSeqAIJPERM_create_perm(B);
614: }
616: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijperm_seqaij_C",MatConvert_SeqAIJPERM_SeqAIJ);
618: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);
619: *newmat = B;
620: return(0);
621: }
625: /*@C
626: MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM.
627: This type inherits from AIJ, but calculates some additional permutation
628: information that is used to allow better vectorization of some
629: operations. At the cost of increased storage, the AIJ formatted
630: matrix can be copied to a format in which pieces of the matrix are
631: stored in ELLPACK format, allowing the vectorized matrix multiply
632: routine to use stride-1 memory accesses. As with the AIJ type, it is
633: important to preallocate matrix storage in order to get good assembly
634: performance.
636: Collective on MPI_Comm638: Input Parameters:
639: + comm - MPI communicator, set to PETSC_COMM_SELF640: . m - number of rows
641: . n - number of columns
642: . nz - number of nonzeros per row (same for all rows)
643: - nnz - array containing the number of nonzeros in the various rows
644: (possibly different for each row) or NULL
646: Output Parameter:
647: . A - the matrix
649: Notes:
650: If nnz is given then nz is ignored
652: Level: intermediate
654: .keywords: matrix, cray, sparse, parallel
656: .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
657: @*/
658: PetscErrorCodeMatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)659: {
663: MatCreate(comm,A);
664: MatSetSizes(*A,m,n,m,n);
665: MatSetType(*A,MATSEQAIJPERM);
666: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
667: return(0);
668: }
672: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJPERM(Mat A)673: {
677: MatSetType(A,MATSEQAIJ);
678: MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_REUSE_MATRIX,&A);
679: return(0);
680: }