Actual source code: sell.c
petsc-3.12.5 2020-03-29
2: /*
3: Defines the basic matrix operations for the SELL matrix storage format.
4: */
5: #include <../src/mat/impls/sell/seq/sell.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/kernels/blocktranspose.h>
8: #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
10: #include <immintrin.h>
12: #if !defined(_MM_SCALE_8)
13: #define _MM_SCALE_8 8
14: #endif
16: #if defined(__AVX512F__)
17: /* these do not work
18: vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
19: vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
20: */
21: #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
22: /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
23: vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \
24: vec_vals = _mm512_loadu_pd(aval); \
25: vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
26: vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
27: #elif defined(__AVX2__) && defined(__FMA__)
28: #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
29: vec_vals = _mm256_loadu_pd(aval); \
30: vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
31: vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
32: vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
33: #endif
34: #endif /* PETSC_HAVE_IMMINTRIN_H */
36: /*@C
37: MatSeqSELLSetPreallocation - For good matrix assembly performance
38: the user should preallocate the matrix storage by setting the parameter nz
39: (or the array nnz). By setting these parameters accurately, performance
40: during matrix assembly can be increased significantly.
42: Collective
44: Input Parameters:
45: + B - The matrix
46: . nz - number of nonzeros per row (same for all rows)
47: - nnz - array containing the number of nonzeros in the various rows
48: (possibly different for each row) or NULL
50: Notes:
51: If nnz is given then nz is ignored.
53: Specify the preallocated storage with either nz or nnz (not both).
54: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
55: allocation. For large problems you MUST preallocate memory or you
56: will get TERRIBLE performance, see the users' manual chapter on matrices.
58: You can call MatGetInfo() to get information on how effective the preallocation was;
59: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
60: You can also run with the option -info and look for messages with the string
61: malloc in them to see if additional memory allocation was needed.
63: Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
64: entries or columns indices.
66: The maximum number of nonzeos in any row should be as accurate as possible.
67: If it is underestimated, you will get bad performance due to reallocation
68: (MatSeqXSELLReallocateSELL).
70: Level: intermediate
72: .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
74: @*/
75: PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
76: {
82: PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));
83: return(0);
84: }
86: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
87: {
88: Mat_SeqSELL *b;
89: PetscInt i,j,totalslices;
90: PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
94: if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
95: if (maxallocrow == MAT_SKIP_ALLOCATION) {
96: skipallocation = PETSC_TRUE;
97: maxallocrow = 0;
98: }
100: PetscLayoutSetUp(B->rmap);
101: PetscLayoutSetUp(B->cmap);
103: /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
104: if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
105: if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
106: if (rlen) {
107: for (i=0; i<B->rmap->n; i++) {
108: if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
109: if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
110: }
111: }
113: B->preallocated = PETSC_TRUE;
115: b = (Mat_SeqSELL*)B->data;
117: totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
118: b->totalslices = totalslices;
119: if (!skipallocation) {
120: if (B->rmap->n & 0x07) {PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);}
122: if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
123: PetscMalloc1(totalslices+1,&b->sliidx);
124: PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));
125: }
126: if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
127: if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
128: else if (maxallocrow < 0) maxallocrow = 1;
129: for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
130: } else {
131: maxallocrow = 0;
132: b->sliidx[0] = 0;
133: for (i=1; i<totalslices; i++) {
134: b->sliidx[i] = 0;
135: for (j=0;j<8;j++) {
136: b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
137: }
138: maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
139: PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);
140: }
141: /* last slice */
142: b->sliidx[totalslices] = 0;
143: for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
144: maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
145: b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
146: }
148: /* allocate space for val, colidx, rlen */
149: /* FIXME: should B's old memory be unlogged? */
150: MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);
151: /* FIXME: assuming an element of the bit array takes 8 bits */
152: PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);
153: PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));
154: /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
155: PetscCalloc1(8*totalslices,&b->rlen);
156: PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));
158: b->singlemalloc = PETSC_TRUE;
159: b->free_val = PETSC_TRUE;
160: b->free_colidx = PETSC_TRUE;
161: } else {
162: b->free_val = PETSC_FALSE;
163: b->free_colidx = PETSC_FALSE;
164: }
166: b->nz = 0;
167: b->maxallocrow = maxallocrow;
168: b->rlenmax = maxallocrow;
169: b->maxallocmat = b->sliidx[totalslices];
170: B->info.nz_unneeded = (double)b->maxallocmat;
171: if (realalloc) {
172: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
173: }
174: return(0);
175: }
177: PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
178: {
179: Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
180: PetscInt shift;
183: if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
184: if (nz) *nz = a->rlen[row];
185: shift = a->sliidx[row>>3]+(row&0x07);
186: if (!a->getrowcols) {
189: PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);
190: }
191: if (idx) {
192: PetscInt j;
193: for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
194: *idx = a->getrowcols;
195: }
196: if (v) {
197: PetscInt j;
198: for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
199: *v = a->getrowvals;
200: }
201: return(0);
202: }
204: PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
205: {
207: return(0);
208: }
210: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
211: {
212: Mat B;
213: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
214: PetscInt i;
218: if (reuse == MAT_REUSE_MATRIX) {
219: B = *newmat;
220: MatZeroEntries(B);
221: } else {
222: MatCreate(PetscObjectComm((PetscObject)A),&B);
223: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
224: MatSetType(B,MATSEQAIJ);
225: MatSeqAIJSetPreallocation(B,0,a->rlen);
226: }
228: for (i=0; i<A->rmap->n; i++) {
229: PetscInt nz = 0,*cols = NULL;
230: PetscScalar *vals = NULL;
232: MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);
233: MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);
234: MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);
235: }
237: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
238: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
239: B->rmap->bs = A->rmap->bs;
241: if (reuse == MAT_INPLACE_MATRIX) {
242: MatHeaderReplace(A,&B);
243: } else {
244: *newmat = B;
245: }
246: return(0);
247: }
249: #include <../src/mat/impls/aij/seq/aij.h>
251: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
252: {
253: Mat B;
254: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
255: PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
256: const PetscInt *cols;
257: const PetscScalar *vals;
258: PetscErrorCode ierr;
262: if (reuse == MAT_REUSE_MATRIX) {
263: B = *newmat;
264: } else {
265: /* Can we just use ilen? */
266: PetscMalloc1(m,&rowlengths);
267: for (i=0; i<m; i++) {
268: rowlengths[i] = ai[i+1] - ai[i];
269: }
271: MatCreate(PetscObjectComm((PetscObject)A),&B);
272: MatSetSizes(B,m,n,m,n);
273: MatSetType(B,MATSEQSELL);
274: MatSeqSELLSetPreallocation(B,0,rowlengths);
275: PetscFree(rowlengths);
276: }
278: for (row=0; row<m; row++) {
279: MatGetRow(A,row,&ncols,&cols,&vals);
280: MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);
281: MatRestoreRow(A,row,&ncols,&cols,&vals);
282: }
283: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
284: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
285: B->rmap->bs = A->rmap->bs;
287: if (reuse == MAT_INPLACE_MATRIX) {
288: MatHeaderReplace(A,&B);
289: } else {
290: *newmat = B;
291: }
292: return(0);
293: }
295: PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
296: {
297: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
298: PetscScalar *y;
299: const PetscScalar *x;
300: const MatScalar *aval=a->val;
301: PetscInt totalslices=a->totalslices;
302: const PetscInt *acolidx=a->colidx;
303: PetscInt i,j;
304: PetscErrorCode ierr;
305: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
306: __m512d vec_x,vec_y,vec_vals;
307: __m256i vec_idx;
308: __mmask8 mask;
309: __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
310: __m256i vec_idx2,vec_idx3,vec_idx4;
311: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
312: __m128i vec_idx;
313: __m256d vec_x,vec_y,vec_y2,vec_vals;
314: MatScalar yval;
315: PetscInt r,rows_left,row,nnz_in_row;
316: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
317: __m128d vec_x_tmp;
318: __m256d vec_x,vec_y,vec_y2,vec_vals;
319: MatScalar yval;
320: PetscInt r,rows_left,row,nnz_in_row;
321: #else
322: PetscScalar sum[8];
323: #endif
325: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
326: #pragma disjoint(*x,*y,*aval)
327: #endif
330: VecGetArrayRead(xx,&x);
331: VecGetArray(yy,&y);
332: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
333: for (i=0; i<totalslices; i++) { /* loop over slices */
334: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
335: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
337: vec_y = _mm512_setzero_pd();
338: vec_y2 = _mm512_setzero_pd();
339: vec_y3 = _mm512_setzero_pd();
340: vec_y4 = _mm512_setzero_pd();
342: j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
343: switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
344: case 3:
345: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
346: acolidx += 8; aval += 8;
347: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
348: acolidx += 8; aval += 8;
349: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
350: acolidx += 8; aval += 8;
351: j += 3;
352: break;
353: case 2:
354: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
355: acolidx += 8; aval += 8;
356: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
357: acolidx += 8; aval += 8;
358: j += 2;
359: break;
360: case 1:
361: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
362: acolidx += 8; aval += 8;
363: j += 1;
364: break;
365: }
366: #pragma novector
367: for (; j<(a->sliidx[i+1]>>3); j+=4) {
368: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
369: acolidx += 8; aval += 8;
370: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
371: acolidx += 8; aval += 8;
372: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
373: acolidx += 8; aval += 8;
374: AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
375: acolidx += 8; aval += 8;
376: }
378: vec_y = _mm512_add_pd(vec_y,vec_y2);
379: vec_y = _mm512_add_pd(vec_y,vec_y3);
380: vec_y = _mm512_add_pd(vec_y,vec_y4);
381: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
382: mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
383: _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
384: } else {
385: _mm512_storeu_pd(&y[8*i],vec_y);
386: }
387: }
388: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
389: for (i=0; i<totalslices; i++) { /* loop over full slices */
390: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
391: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
393: /* last slice may have padding rows. Don't use vectorization. */
394: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
395: rows_left = A->rmap->n - 8*i;
396: for (r=0; r<rows_left; ++r) {
397: yval = (MatScalar)0;
398: row = 8*i + r;
399: nnz_in_row = a->rlen[row];
400: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
401: y[row] = yval;
402: }
403: break;
404: }
406: vec_y = _mm256_setzero_pd();
407: vec_y2 = _mm256_setzero_pd();
409: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
410: #pragma novector
411: #pragma unroll(2)
412: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
413: AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
414: aval += 4; acolidx += 4;
415: AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
416: aval += 4; acolidx += 4;
417: }
419: _mm256_storeu_pd(y+i*8,vec_y);
420: _mm256_storeu_pd(y+i*8+4,vec_y2);
421: }
422: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
423: for (i=0; i<totalslices; i++) { /* loop over full slices */
424: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
425: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
427: vec_y = _mm256_setzero_pd();
428: vec_y2 = _mm256_setzero_pd();
430: /* last slice may have padding rows. Don't use vectorization. */
431: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
432: rows_left = A->rmap->n - 8*i;
433: for (r=0; r<rows_left; ++r) {
434: yval = (MatScalar)0;
435: row = 8*i + r;
436: nnz_in_row = a->rlen[row];
437: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
438: y[row] = yval;
439: }
440: break;
441: }
443: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
444: #pragma novector
445: #pragma unroll(2)
446: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
447: vec_vals = _mm256_loadu_pd(aval);
448: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
449: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
450: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
451: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
454: vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
455: aval += 4;
457: vec_vals = _mm256_loadu_pd(aval);
458: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
461: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
464: vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
465: aval += 4;
466: }
468: _mm256_storeu_pd(y + i*8, vec_y);
469: _mm256_storeu_pd(y + i*8 + 4, vec_y2);
470: }
471: #else
472: for (i=0; i<totalslices; i++) { /* loop over slices */
473: for (j=0; j<8; j++) sum[j] = 0.0;
474: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
475: sum[0] += aval[j] * x[acolidx[j]];
476: sum[1] += aval[j+1] * x[acolidx[j+1]];
477: sum[2] += aval[j+2] * x[acolidx[j+2]];
478: sum[3] += aval[j+3] * x[acolidx[j+3]];
479: sum[4] += aval[j+4] * x[acolidx[j+4]];
480: sum[5] += aval[j+5] * x[acolidx[j+5]];
481: sum[6] += aval[j+6] * x[acolidx[j+6]];
482: sum[7] += aval[j+7] * x[acolidx[j+7]];
483: }
484: if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
485: for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
486: } else {
487: for(j=0; j<8; j++) y[8*i+j] = sum[j];
488: }
489: }
490: #endif
492: PetscLogFlops(2.0*a->nz-a->nonzerorowcnt); /* theoretical minimal FLOPs */
493: VecRestoreArrayRead(xx,&x);
494: VecRestoreArray(yy,&y);
495: return(0);
496: }
498: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
499: PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
500: {
501: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
502: PetscScalar *y,*z;
503: const PetscScalar *x;
504: const MatScalar *aval=a->val;
505: PetscInt totalslices=a->totalslices;
506: const PetscInt *acolidx=a->colidx;
507: PetscInt i,j;
508: PetscErrorCode ierr;
509: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
510: __m512d vec_x,vec_y,vec_vals;
511: __m256i vec_idx;
512: __mmask8 mask;
513: __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
514: __m256i vec_idx2,vec_idx3,vec_idx4;
515: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
516: __m128d vec_x_tmp;
517: __m256d vec_x,vec_y,vec_y2,vec_vals;
518: MatScalar yval;
519: PetscInt r,row,nnz_in_row;
520: #else
521: PetscScalar sum[8];
522: #endif
524: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
525: #pragma disjoint(*x,*y,*aval)
526: #endif
529: VecGetArrayRead(xx,&x);
530: VecGetArrayPair(yy,zz,&y,&z);
531: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
532: for (i=0; i<totalslices; i++) { /* loop over slices */
533: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
534: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
536: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
537: mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
538: vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
539: } else {
540: vec_y = _mm512_loadu_pd(&y[8*i]);
541: }
542: vec_y2 = _mm512_setzero_pd();
543: vec_y3 = _mm512_setzero_pd();
544: vec_y4 = _mm512_setzero_pd();
546: j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
547: switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
548: case 3:
549: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
550: acolidx += 8; aval += 8;
551: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
552: acolidx += 8; aval += 8;
553: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
554: acolidx += 8; aval += 8;
555: j += 3;
556: break;
557: case 2:
558: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
559: acolidx += 8; aval += 8;
560: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
561: acolidx += 8; aval += 8;
562: j += 2;
563: break;
564: case 1:
565: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
566: acolidx += 8; aval += 8;
567: j += 1;
568: break;
569: }
570: #pragma novector
571: for (; j<(a->sliidx[i+1]>>3); j+=4) {
572: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
573: acolidx += 8; aval += 8;
574: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
575: acolidx += 8; aval += 8;
576: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
577: acolidx += 8; aval += 8;
578: AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
579: acolidx += 8; aval += 8;
580: }
582: vec_y = _mm512_add_pd(vec_y,vec_y2);
583: vec_y = _mm512_add_pd(vec_y,vec_y3);
584: vec_y = _mm512_add_pd(vec_y,vec_y4);
585: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
586: _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
587: } else {
588: _mm512_storeu_pd(&z[8*i],vec_y);
589: }
590: }
591: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
592: for (i=0; i<totalslices; i++) { /* loop over full slices */
593: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
594: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
596: /* last slice may have padding rows. Don't use vectorization. */
597: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
598: for (r=0; r<(A->rmap->n & 0x07); ++r) {
599: row = 8*i + r;
600: yval = (MatScalar)0.0;
601: nnz_in_row = a->rlen[row];
602: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
603: z[row] = y[row] + yval;
604: }
605: break;
606: }
608: vec_y = _mm256_loadu_pd(y+8*i);
609: vec_y2 = _mm256_loadu_pd(y+8*i+4);
611: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
612: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
613: vec_vals = _mm256_loadu_pd(aval);
614: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
615: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
616: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
617: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
620: vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
621: aval += 4;
623: vec_vals = _mm256_loadu_pd(aval);
624: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
625: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
626: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
627: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
630: vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
631: aval += 4;
632: }
634: _mm256_storeu_pd(z+i*8,vec_y);
635: _mm256_storeu_pd(z+i*8+4,vec_y2);
636: }
637: #else
638: for (i=0; i<totalslices; i++) { /* loop over slices */
639: for (j=0; j<8; j++) sum[j] = 0.0;
640: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
641: sum[0] += aval[j] * x[acolidx[j]];
642: sum[1] += aval[j+1] * x[acolidx[j+1]];
643: sum[2] += aval[j+2] * x[acolidx[j+2]];
644: sum[3] += aval[j+3] * x[acolidx[j+3]];
645: sum[4] += aval[j+4] * x[acolidx[j+4]];
646: sum[5] += aval[j+5] * x[acolidx[j+5]];
647: sum[6] += aval[j+6] * x[acolidx[j+6]];
648: sum[7] += aval[j+7] * x[acolidx[j+7]];
649: }
650: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
651: for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
652: } else {
653: for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
654: }
655: }
656: #endif
658: PetscLogFlops(2.0*a->nz);
659: VecRestoreArrayRead(xx,&x);
660: VecRestoreArrayPair(yy,zz,&y,&z);
661: return(0);
662: }
664: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
665: {
666: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
667: PetscScalar *y;
668: const PetscScalar *x;
669: const MatScalar *aval=a->val;
670: const PetscInt *acolidx=a->colidx;
671: PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices;
672: PetscErrorCode ierr;
674: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
675: #pragma disjoint(*x,*y,*aval)
676: #endif
679: if (A->symmetric) {
680: MatMultAdd_SeqSELL(A,xx,zz,yy);
681: return(0);
682: }
683: if (zz != yy) { VecCopy(zz,yy); }
684: VecGetArrayRead(xx,&x);
685: VecGetArray(yy,&y);
686: for (i=0; i<a->totalslices; i++) { /* loop over slices */
687: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
688: for (r=0; r<(A->rmap->n & 0x07); ++r) {
689: row = 8*i + r;
690: nnz_in_row = a->rlen[row];
691: for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
692: }
693: break;
694: }
695: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
696: y[acolidx[j]] += aval[j] * x[8*i];
697: y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
698: y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
699: y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
700: y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
701: y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
702: y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
703: y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
704: }
705: }
706: PetscLogFlops(2.0*a->sliidx[a->totalslices]);
707: VecRestoreArrayRead(xx,&x);
708: VecRestoreArray(yy,&y);
709: return(0);
710: }
712: PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
713: {
717: if (A->symmetric) {
718: MatMult_SeqSELL(A,xx,yy);
719: } else {
720: VecSet(yy,0.0);
721: MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);
722: }
723: return(0);
724: }
726: /*
727: Checks for missing diagonals
728: */
729: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d)
730: {
731: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
732: PetscInt *diag,i;
736: *missing = PETSC_FALSE;
737: if (A->rmap->n > 0 && !(a->colidx)) {
738: *missing = PETSC_TRUE;
739: if (d) *d = 0;
740: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
741: } else {
742: diag = a->diag;
743: for (i=0; i<A->rmap->n; i++) {
744: if (diag[i] == -1) {
745: *missing = PETSC_TRUE;
746: if (d) *d = i;
747: PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
748: break;
749: }
750: }
751: }
752: return(0);
753: }
755: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
756: {
757: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
758: PetscInt i,j,m=A->rmap->n,shift;
762: if (!a->diag) {
763: PetscMalloc1(m,&a->diag);
764: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
765: a->free_diag = PETSC_TRUE;
766: }
767: for (i=0; i<m; i++) { /* loop over rows */
768: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
769: a->diag[i] = -1;
770: for (j=0; j<a->rlen[i]; j++) {
771: if (a->colidx[shift+j*8] == i) {
772: a->diag[i] = shift+j*8;
773: break;
774: }
775: }
776: }
777: return(0);
778: }
780: /*
781: Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
782: */
783: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
784: {
785: Mat_SeqSELL *a=(Mat_SeqSELL*) A->data;
786: PetscInt i,*diag,m = A->rmap->n;
787: MatScalar *val = a->val;
788: PetscScalar *idiag,*mdiag;
792: if (a->idiagvalid) return(0);
793: MatMarkDiagonal_SeqSELL(A);
794: diag = a->diag;
795: if (!a->idiag) {
796: PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);
797: PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));
798: val = a->val;
799: }
800: mdiag = a->mdiag;
801: idiag = a->idiag;
803: if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
804: for (i=0; i<m; i++) {
805: mdiag[i] = val[diag[i]];
806: if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
807: if (PetscRealPart(fshift)) {
808: PetscInfo1(A,"Zero diagonal on row %D\n",i);
809: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
810: A->factorerror_zeropivot_value = 0.0;
811: A->factorerror_zeropivot_row = i;
812: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
813: }
814: idiag[i] = 1.0/val[diag[i]];
815: }
816: PetscLogFlops(m);
817: } else {
818: for (i=0; i<m; i++) {
819: mdiag[i] = val[diag[i]];
820: idiag[i] = omega/(fshift + val[diag[i]]);
821: }
822: PetscLogFlops(2.0*m);
823: }
824: a->idiagvalid = PETSC_TRUE;
825: return(0);
826: }
828: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
829: {
830: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
834: PetscArrayzero(a->val,a->sliidx[a->totalslices]);
835: MatSeqSELLInvalidateDiagonal(A);
836: return(0);
837: }
839: PetscErrorCode MatDestroy_SeqSELL(Mat A)
840: {
841: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
845: #if defined(PETSC_USE_LOG)
846: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
847: #endif
848: MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);
849: ISDestroy(&a->row);
850: ISDestroy(&a->col);
851: PetscFree(a->diag);
852: PetscFree(a->rlen);
853: PetscFree(a->sliidx);
854: PetscFree3(a->idiag,a->mdiag,a->ssor_work);
855: PetscFree(a->solve_work);
856: ISDestroy(&a->icol);
857: PetscFree(a->saved_values);
858: PetscFree2(a->getrowcols,a->getrowvals);
860: PetscFree(A->data);
862: PetscObjectChangeTypeName((PetscObject)A,0);
863: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
864: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
865: #if defined(PETSC_HAVE_ELEMENTAL)
866: #endif
867: PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);
868: return(0);
869: }
871: PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
872: {
873: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
877: switch (op) {
878: case MAT_ROW_ORIENTED:
879: a->roworiented = flg;
880: break;
881: case MAT_KEEP_NONZERO_PATTERN:
882: a->keepnonzeropattern = flg;
883: break;
884: case MAT_NEW_NONZERO_LOCATIONS:
885: a->nonew = (flg ? 0 : 1);
886: break;
887: case MAT_NEW_NONZERO_LOCATION_ERR:
888: a->nonew = (flg ? -1 : 0);
889: break;
890: case MAT_NEW_NONZERO_ALLOCATION_ERR:
891: a->nonew = (flg ? -2 : 0);
892: break;
893: case MAT_UNUSED_NONZERO_LOCATION_ERR:
894: a->nounused = (flg ? -1 : 0);
895: break;
896: case MAT_NEW_DIAGONALS:
897: case MAT_IGNORE_OFF_PROC_ENTRIES:
898: case MAT_USE_HASH_TABLE:
899: case MAT_SORTED_FULL:
900: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
901: break;
902: case MAT_SPD:
903: case MAT_SYMMETRIC:
904: case MAT_STRUCTURALLY_SYMMETRIC:
905: case MAT_HERMITIAN:
906: case MAT_SYMMETRY_ETERNAL:
907: /* These options are handled directly by MatSetOption() */
908: break;
909: default:
910: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
911: }
912: return(0);
913: }
915: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
916: {
917: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
918: PetscInt i,j,n,shift;
919: PetscScalar *x,zero=0.0;
923: VecGetLocalSize(v,&n);
924: if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
926: if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
927: PetscInt *diag=a->diag;
928: VecGetArray(v,&x);
929: for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
930: VecRestoreArray(v,&x);
931: return(0);
932: }
934: VecSet(v,zero);
935: VecGetArray(v,&x);
936: for (i=0; i<n; i++) { /* loop over rows */
937: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
938: x[i] = 0;
939: for (j=0; j<a->rlen[i]; j++) {
940: if (a->colidx[shift+j*8] == i) {
941: x[i] = a->val[shift+j*8];
942: break;
943: }
944: }
945: }
946: VecRestoreArray(v,&x);
947: return(0);
948: }
950: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
951: {
952: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
953: const PetscScalar *l,*r;
954: PetscInt i,j,m,n,row;
955: PetscErrorCode ierr;
958: if (ll) {
959: /* The local size is used so that VecMPI can be passed to this routine
960: by MatDiagonalScale_MPISELL */
961: VecGetLocalSize(ll,&m);
962: if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
963: VecGetArrayRead(ll,&l);
964: for (i=0; i<a->totalslices; i++) { /* loop over slices */
965: if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
966: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
967: if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row];
968: }
969: } else {
970: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
971: a->val[j] *= l[8*i+row];
972: }
973: }
974: }
975: VecRestoreArrayRead(ll,&l);
976: PetscLogFlops(a->nz);
977: }
978: if (rr) {
979: VecGetLocalSize(rr,&n);
980: if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
981: VecGetArrayRead(rr,&r);
982: for (i=0; i<a->totalslices; i++) { /* loop over slices */
983: if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
984: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
985: if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
986: }
987: } else {
988: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
989: a->val[j] *= r[a->colidx[j]];
990: }
991: }
992: }
993: VecRestoreArrayRead(rr,&r);
994: PetscLogFlops(a->nz);
995: }
996: MatSeqSELLInvalidateDiagonal(A);
997: return(0);
998: }
1000: extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1002: PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1003: {
1004: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1005: PetscInt *cp,i,k,low,high,t,row,col,l;
1006: PetscInt shift;
1007: MatScalar *vp;
1010: for (k=0; k<m; k++) { /* loop over requested rows */
1011: row = im[k];
1012: if (row<0) continue;
1013: #if defined(PETSC_USE_DEBUG)
1014: if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1015: #endif
1016: shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1017: cp = a->colidx+shift; /* pointer to the row */
1018: vp = a->val+shift; /* pointer to the row */
1019: for (l=0; l<n; l++) { /* loop over requested columns */
1020: col = in[l];
1021: if (col<0) continue;
1022: #if defined(PETSC_USE_DEBUG)
1023: if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1024: #endif
1025: high = a->rlen[row]; low = 0; /* assume unsorted */
1026: while (high-low > 5) {
1027: t = (low+high)/2;
1028: if (*(cp+t*8) > col) high = t;
1029: else low = t;
1030: }
1031: for (i=low; i<high; i++) {
1032: if (*(cp+8*i) > col) break;
1033: if (*(cp+8*i) == col) {
1034: *v++ = *(vp+8*i);
1035: goto finished;
1036: }
1037: }
1038: *v++ = 0.0;
1039: finished:;
1040: }
1041: }
1042: return(0);
1043: }
1045: PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1046: {
1047: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1048: PetscInt i,j,m=A->rmap->n,shift;
1049: const char *name;
1050: PetscViewerFormat format;
1051: PetscErrorCode ierr;
1054: PetscViewerGetFormat(viewer,&format);
1055: if (format == PETSC_VIEWER_ASCII_MATLAB) {
1056: PetscInt nofinalvalue = 0;
1057: /*
1058: if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1059: nofinalvalue = 1;
1060: }
1061: */
1062: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1063: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);
1064: PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);
1065: #if defined(PETSC_USE_COMPLEX)
1066: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);
1067: #else
1068: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);
1069: #endif
1070: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
1072: for (i=0; i<m; i++) {
1073: shift = a->sliidx[i>>3]+(i&0x07);
1074: for (j=0; j<a->rlen[i]; j++) {
1075: #if defined(PETSC_USE_COMPLEX)
1076: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1077: #else
1078: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);
1079: #endif
1080: }
1081: }
1082: /*
1083: if (nofinalvalue) {
1084: #if defined(PETSC_USE_COMPLEX)
1085: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);
1086: #else
1087: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);
1088: #endif
1089: }
1090: */
1091: PetscObjectGetName((PetscObject)A,&name);
1092: PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);
1093: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1094: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1095: return(0);
1096: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1097: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1098: for (i=0; i<m; i++) {
1099: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1100: shift = a->sliidx[i>>3]+(i&0x07);
1101: for (j=0; j<a->rlen[i]; j++) {
1102: #if defined(PETSC_USE_COMPLEX)
1103: if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1104: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1105: } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1106: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1107: } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1108: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1109: }
1110: #else
1111: if (a->val[shift+8*j] != 0.0) {PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);}
1112: #endif
1113: }
1114: PetscViewerASCIIPrintf(viewer,"\n");
1115: }
1116: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1117: } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1118: PetscInt cnt=0,jcnt;
1119: PetscScalar value;
1120: #if defined(PETSC_USE_COMPLEX)
1121: PetscBool realonly=PETSC_TRUE;
1122: for (i=0; i<a->sliidx[a->totalslices]; i++) {
1123: if (PetscImaginaryPart(a->val[i]) != 0.0) {
1124: realonly = PETSC_FALSE;
1125: break;
1126: }
1127: }
1128: #endif
1130: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1131: for (i=0; i<m; i++) {
1132: jcnt = 0;
1133: shift = a->sliidx[i>>3]+(i&0x07);
1134: for (j=0; j<A->cmap->n; j++) {
1135: if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1136: value = a->val[cnt++];
1137: jcnt++;
1138: } else {
1139: value = 0.0;
1140: }
1141: #if defined(PETSC_USE_COMPLEX)
1142: if (realonly) {
1143: PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));
1144: } else {
1145: PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));
1146: }
1147: #else
1148: PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);
1149: #endif
1150: }
1151: PetscViewerASCIIPrintf(viewer,"\n");
1152: }
1153: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1154: } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1155: PetscInt fshift=1;
1156: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1157: #if defined(PETSC_USE_COMPLEX)
1158: PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");
1159: #else
1160: PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");
1161: #endif
1162: PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);
1163: for (i=0; i<m; i++) {
1164: shift = a->sliidx[i>>3]+(i&0x07);
1165: for (j=0; j<a->rlen[i]; j++) {
1166: #if defined(PETSC_USE_COMPLEX)
1167: PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1168: #else
1169: PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);
1170: #endif
1171: }
1172: }
1173: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1174: } else if (format == PETSC_VIEWER_NATIVE) {
1175: for (i=0; i<a->totalslices; i++) { /* loop over slices */
1176: PetscInt row;
1177: PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);
1178: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1179: #if defined(PETSC_USE_COMPLEX)
1180: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1181: PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1182: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1183: PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));
1184: } else {
1185: PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));
1186: }
1187: #else
1188: PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);
1189: #endif
1190: }
1191: }
1192: } else {
1193: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1194: if (A->factortype) {
1195: for (i=0; i<m; i++) {
1196: shift = a->sliidx[i>>3]+(i&0x07);
1197: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1198: /* L part */
1199: for (j=shift; j<a->diag[i]; j+=8) {
1200: #if defined(PETSC_USE_COMPLEX)
1201: if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1202: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1203: } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1204: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1205: } else {
1206: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1207: }
1208: #else
1209: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1210: #endif
1211: }
1212: /* diagonal */
1213: j = a->diag[i];
1214: #if defined(PETSC_USE_COMPLEX)
1215: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1216: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));
1217: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1218: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));
1219: } else {
1220: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));
1221: }
1222: #else
1223: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));
1224: #endif
1226: /* U part */
1227: for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1228: #if defined(PETSC_USE_COMPLEX)
1229: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1230: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1231: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1232: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1233: } else {
1234: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1235: }
1236: #else
1237: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1238: #endif
1239: }
1240: PetscViewerASCIIPrintf(viewer,"\n");
1241: }
1242: } else {
1243: for (i=0; i<m; i++) {
1244: shift = a->sliidx[i>>3]+(i&0x07);
1245: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1246: for (j=0; j<a->rlen[i]; j++) {
1247: #if defined(PETSC_USE_COMPLEX)
1248: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1249: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1250: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1251: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1252: } else {
1253: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1254: }
1255: #else
1256: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);
1257: #endif
1258: }
1259: PetscViewerASCIIPrintf(viewer,"\n");
1260: }
1261: }
1262: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1263: }
1264: PetscViewerFlush(viewer);
1265: return(0);
1266: }
1268: #include <petscdraw.h>
1269: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1270: {
1271: Mat A=(Mat)Aa;
1272: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1273: PetscInt i,j,m=A->rmap->n,shift;
1274: int color;
1275: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1276: PetscViewer viewer;
1277: PetscViewerFormat format;
1278: PetscErrorCode ierr;
1281: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1282: PetscViewerGetFormat(viewer,&format);
1283: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1285: /* loop over matrix elements drawing boxes */
1287: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1288: PetscDrawCollectiveBegin(draw);
1289: /* Blue for negative, Cyan for zero and Red for positive */
1290: color = PETSC_DRAW_BLUE;
1291: for (i=0; i<m; i++) {
1292: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1293: y_l = m - i - 1.0; y_r = y_l + 1.0;
1294: for (j=0; j<a->rlen[i]; j++) {
1295: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1296: if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue;
1297: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1298: }
1299: }
1300: color = PETSC_DRAW_CYAN;
1301: for (i=0; i<m; i++) {
1302: shift = a->sliidx[i>>3]+(i&0x07);
1303: y_l = m - i - 1.0; y_r = y_l + 1.0;
1304: for (j=0; j<a->rlen[i]; j++) {
1305: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1306: if (a->val[shift+8*j] != 0.) continue;
1307: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1308: }
1309: }
1310: color = PETSC_DRAW_RED;
1311: for (i=0; i<m; i++) {
1312: shift = a->sliidx[i>>3]+(i&0x07);
1313: y_l = m - i - 1.0; y_r = y_l + 1.0;
1314: for (j=0; j<a->rlen[i]; j++) {
1315: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1316: if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue;
1317: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1318: }
1319: }
1320: PetscDrawCollectiveEnd(draw);
1321: } else {
1322: /* use contour shading to indicate magnitude of values */
1323: /* first determine max of all nonzero values */
1324: PetscReal minv=0.0,maxv=0.0;
1325: PetscInt count=0;
1326: PetscDraw popup;
1327: for (i=0; i<a->sliidx[a->totalslices]; i++) {
1328: if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1329: }
1330: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1331: PetscDrawGetPopup(draw,&popup);
1332: PetscDrawScalePopup(popup,minv,maxv);
1334: PetscDrawCollectiveBegin(draw);
1335: for (i=0; i<m; i++) {
1336: shift = a->sliidx[i>>3]+(i&0x07);
1337: y_l = m - i - 1.0;
1338: y_r = y_l + 1.0;
1339: for (j=0; j<a->rlen[i]; j++) {
1340: x_l = a->colidx[shift+j*8];
1341: x_r = x_l + 1.0;
1342: color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1343: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1344: count++;
1345: }
1346: }
1347: PetscDrawCollectiveEnd(draw);
1348: }
1349: return(0);
1350: }
1352: #include <petscdraw.h>
1353: PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1354: {
1355: PetscDraw draw;
1356: PetscReal xr,yr,xl,yl,h,w;
1357: PetscBool isnull;
1361: PetscViewerDrawGetDraw(viewer,0,&draw);
1362: PetscDrawIsNull(draw,&isnull);
1363: if (isnull) return(0);
1365: xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0;
1366: xr += w; yr += h; xl = -w; yl = -h;
1367: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1368: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1369: PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);
1370: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1371: PetscDrawSave(draw);
1372: return(0);
1373: }
1375: PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1376: {
1377: PetscBool iascii,isbinary,isdraw;
1381: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1382: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1383: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1384: if (iascii) {
1385: MatView_SeqSELL_ASCII(A,viewer);
1386: } else if (isbinary) {
1387: /* MatView_SeqSELL_Binary(A,viewer); */
1388: } else if (isdraw) {
1389: MatView_SeqSELL_Draw(A,viewer);
1390: }
1391: return(0);
1392: }
1394: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1395: {
1396: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1397: PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1398: MatScalar *vp;
1402: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1403: /* To do: compress out the unused elements */
1404: MatMarkDiagonal_SeqSELL(A);
1405: PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);
1406: PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
1407: PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);
1408: /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1409: for (i=0; i<a->totalslices; ++i) {
1410: shift = a->sliidx[i]; /* starting index of the slice */
1411: cp = a->colidx+shift; /* pointer to the column indices of the slice */
1412: vp = a->val+shift; /* pointer to the nonzero values of the slice */
1413: for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1414: row = 8*i + row_in_slice;
1415: nrow = a->rlen[row]; /* number of nonzeros in row */
1416: /*
1417: Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1418: But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1419: */
1420: lastcol = 0;
1421: if (nrow>0) { /* nonempty row */
1422: lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1423: } else if (!row_in_slice) { /* first row of the currect slice is empty */
1424: for (j=1;j<8;j++) {
1425: if (a->rlen[8*i+j]) {
1426: lastcol = cp[j];
1427: break;
1428: }
1429: }
1430: } else {
1431: if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1432: }
1434: for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1435: cp[8*k+row_in_slice] = lastcol;
1436: vp[8*k+row_in_slice] = (MatScalar)0;
1437: }
1438: }
1439: }
1441: A->info.mallocs += a->reallocs;
1442: a->reallocs = 0;
1444: MatSeqSELLInvalidateDiagonal(A);
1445: return(0);
1446: }
1448: PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1449: {
1450: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1453: info->block_size = 1.0;
1454: info->nz_allocated = a->maxallocmat;
1455: info->nz_used = a->sliidx[a->totalslices]; /* include padding zeros */
1456: info->nz_unneeded = (a->maxallocmat-a->sliidx[a->totalslices]);
1457: info->assemblies = A->num_ass;
1458: info->mallocs = A->info.mallocs;
1459: info->memory = ((PetscObject)A)->mem;
1460: if (A->factortype) {
1461: info->fill_ratio_given = A->info.fill_ratio_given;
1462: info->fill_ratio_needed = A->info.fill_ratio_needed;
1463: info->factor_mallocs = A->info.factor_mallocs;
1464: } else {
1465: info->fill_ratio_given = 0;
1466: info->fill_ratio_needed = 0;
1467: info->factor_mallocs = 0;
1468: }
1469: return(0);
1470: }
1472: PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1473: {
1474: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1475: PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow;
1476: PetscInt *cp,nonew=a->nonew,lastcol=-1;
1477: MatScalar *vp,value;
1481: for (k=0; k<m; k++) { /* loop over added rows */
1482: row = im[k];
1483: if (row < 0) continue;
1484: #if defined(PETSC_USE_DEBUG)
1485: if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1486: #endif
1487: shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1488: cp = a->colidx+shift; /* pointer to the row */
1489: vp = a->val+shift; /* pointer to the row */
1490: nrow = a->rlen[row];
1491: low = 0;
1492: high = nrow;
1494: for (l=0; l<n; l++) { /* loop over added columns */
1495: col = in[l];
1496: if (col<0) continue;
1497: #if defined(PETSC_USE_DEBUG)
1498: if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1499: #endif
1500: if (a->roworiented) {
1501: value = v[l+k*n];
1502: } else {
1503: value = v[k+l*m];
1504: }
1505: if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1507: /* search in this row for the specified colmun, i indicates the column to be set */
1508: if (col <= lastcol) low = 0;
1509: else high = nrow;
1510: lastcol = col;
1511: while (high-low > 5) {
1512: t = (low+high)/2;
1513: if (*(cp+t*8) > col) high = t;
1514: else low = t;
1515: }
1516: for (i=low; i<high; i++) {
1517: if (*(cp+i*8) > col) break;
1518: if (*(cp+i*8) == col) {
1519: if (is == ADD_VALUES) *(vp+i*8) += value;
1520: else *(vp+i*8) = value;
1521: low = i + 1;
1522: goto noinsert;
1523: }
1524: }
1525: if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1526: if (nonew == 1) goto noinsert;
1527: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1528: /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1529: MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1530: /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1531: for (ii=nrow-1; ii>=i; ii--) {
1532: *(cp+(ii+1)*8) = *(cp+ii*8);
1533: *(vp+(ii+1)*8) = *(vp+ii*8);
1534: }
1535: a->rlen[row]++;
1536: *(cp+i*8) = col;
1537: *(vp+i*8) = value;
1538: a->nz++;
1539: A->nonzerostate++;
1540: low = i+1; high++; nrow++;
1541: noinsert:;
1542: }
1543: a->rlen[row] = nrow;
1544: }
1545: return(0);
1546: }
1548: PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1549: {
1553: /* If the two matrices have the same copy implementation, use fast copy. */
1554: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1555: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1556: Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1558: if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1559: PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);
1560: } else {
1561: MatCopy_Basic(A,B,str);
1562: }
1563: return(0);
1564: }
1566: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1567: {
1571: MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);
1572: return(0);
1573: }
1575: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1576: {
1577: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1580: *array = a->val;
1581: return(0);
1582: }
1584: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1585: {
1587: return(0);
1588: }
1590: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1591: {
1592: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1593: PetscInt i;
1594: MatScalar *aval=a->val;
1597: for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1598: return(0);
1599: }
1601: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1602: {
1603: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1604: PetscInt i;
1605: MatScalar *aval=a->val;
1609: for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1610: MatSeqSELLInvalidateDiagonal(A);
1611: return(0);
1612: }
1614: PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1615: {
1616: Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data;
1617: MatScalar *aval=a->val;
1618: PetscScalar oalpha=alpha;
1619: PetscBLASInt one=1,size;
1623: PetscBLASIntCast(a->sliidx[a->totalslices],&size);
1624: PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1625: PetscLogFlops(a->nz);
1626: MatSeqSELLInvalidateDiagonal(inA);
1627: return(0);
1628: }
1630: PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1631: {
1632: Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data;
1636: if (!Y->preallocated || !y->nz) {
1637: MatSeqSELLSetPreallocation(Y,1,NULL);
1638: }
1639: MatShift_Basic(Y,a);
1640: return(0);
1641: }
1643: PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1644: {
1645: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1646: PetscScalar *x,sum,*t;
1647: const MatScalar *idiag=0,*mdiag;
1648: const PetscScalar *b,*xb;
1649: PetscInt n,m=A->rmap->n,i,j,shift;
1650: const PetscInt *diag;
1651: PetscErrorCode ierr;
1654: its = its*lits;
1656: if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1657: if (!a->idiagvalid) {MatInvertDiagonal_SeqSELL(A,omega,fshift);}
1658: a->fshift = fshift;
1659: a->omega = omega;
1661: diag = a->diag;
1662: t = a->ssor_work;
1663: idiag = a->idiag;
1664: mdiag = a->mdiag;
1666: VecGetArray(xx,&x);
1667: VecGetArrayRead(bb,&b);
1668: /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1669: if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1670: if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1671: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1673: if (flag & SOR_ZERO_INITIAL_GUESS) {
1674: if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1675: for (i=0; i<m; i++) {
1676: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1677: sum = b[i];
1678: n = (diag[i]-shift)/8;
1679: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1680: t[i] = sum;
1681: x[i] = sum*idiag[i];
1682: }
1683: xb = t;
1684: PetscLogFlops(a->nz);
1685: } else xb = b;
1686: if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1687: for (i=m-1; i>=0; i--) {
1688: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1689: sum = xb[i];
1690: n = a->rlen[i]-(diag[i]-shift)/8-1;
1691: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1692: if (xb == b) {
1693: x[i] = sum*idiag[i];
1694: } else {
1695: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1696: }
1697: }
1698: PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1699: }
1700: its--;
1701: }
1702: while (its--) {
1703: if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1704: for (i=0; i<m; i++) {
1705: /* lower */
1706: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1707: sum = b[i];
1708: n = (diag[i]-shift)/8;
1709: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1710: t[i] = sum; /* save application of the lower-triangular part */
1711: /* upper */
1712: n = a->rlen[i]-(diag[i]-shift)/8-1;
1713: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1714: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1715: }
1716: xb = t;
1717: PetscLogFlops(2.0*a->nz);
1718: } else xb = b;
1719: if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1720: for (i=m-1; i>=0; i--) {
1721: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1722: sum = xb[i];
1723: if (xb == b) {
1724: /* whole matrix (no checkpointing available) */
1725: n = a->rlen[i];
1726: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1727: x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1728: } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1729: n = a->rlen[i]-(diag[i]-shift)/8-1;
1730: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1731: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1732: }
1733: }
1734: if (xb == b) {
1735: PetscLogFlops(2.0*a->nz);
1736: } else {
1737: PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1738: }
1739: }
1740: }
1741: VecRestoreArray(xx,&x);
1742: VecRestoreArrayRead(bb,&b);
1743: return(0);
1744: }
1746: /* -------------------------------------------------------------------*/
1747: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1748: MatGetRow_SeqSELL,
1749: MatRestoreRow_SeqSELL,
1750: MatMult_SeqSELL,
1751: /* 4*/ MatMultAdd_SeqSELL,
1752: MatMultTranspose_SeqSELL,
1753: MatMultTransposeAdd_SeqSELL,
1754: 0,
1755: 0,
1756: 0,
1757: /* 10*/ 0,
1758: 0,
1759: 0,
1760: MatSOR_SeqSELL,
1761: 0,
1762: /* 15*/ MatGetInfo_SeqSELL,
1763: MatEqual_SeqSELL,
1764: MatGetDiagonal_SeqSELL,
1765: MatDiagonalScale_SeqSELL,
1766: 0,
1767: /* 20*/ 0,
1768: MatAssemblyEnd_SeqSELL,
1769: MatSetOption_SeqSELL,
1770: MatZeroEntries_SeqSELL,
1771: /* 24*/ 0,
1772: 0,
1773: 0,
1774: 0,
1775: 0,
1776: /* 29*/ MatSetUp_SeqSELL,
1777: 0,
1778: 0,
1779: 0,
1780: 0,
1781: /* 34*/ MatDuplicate_SeqSELL,
1782: 0,
1783: 0,
1784: 0,
1785: 0,
1786: /* 39*/ 0,
1787: 0,
1788: 0,
1789: MatGetValues_SeqSELL,
1790: MatCopy_SeqSELL,
1791: /* 44*/ 0,
1792: MatScale_SeqSELL,
1793: MatShift_SeqSELL,
1794: 0,
1795: 0,
1796: /* 49*/ 0,
1797: 0,
1798: 0,
1799: 0,
1800: 0,
1801: /* 54*/ MatFDColoringCreate_SeqXAIJ,
1802: 0,
1803: 0,
1804: 0,
1805: 0,
1806: /* 59*/ 0,
1807: MatDestroy_SeqSELL,
1808: MatView_SeqSELL,
1809: 0,
1810: 0,
1811: /* 64*/ 0,
1812: 0,
1813: 0,
1814: 0,
1815: 0,
1816: /* 69*/ 0,
1817: 0,
1818: 0,
1819: 0,
1820: 0,
1821: /* 74*/ 0,
1822: MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1823: 0,
1824: 0,
1825: 0,
1826: /* 79*/ 0,
1827: 0,
1828: 0,
1829: 0,
1830: 0,
1831: /* 84*/ 0,
1832: 0,
1833: 0,
1834: 0,
1835: 0,
1836: /* 89*/ 0,
1837: 0,
1838: 0,
1839: 0,
1840: 0,
1841: /* 94*/ 0,
1842: 0,
1843: 0,
1844: 0,
1845: 0,
1846: /* 99*/ 0,
1847: 0,
1848: 0,
1849: MatConjugate_SeqSELL,
1850: 0,
1851: /*104*/ 0,
1852: 0,
1853: 0,
1854: 0,
1855: 0,
1856: /*109*/ 0,
1857: 0,
1858: 0,
1859: 0,
1860: MatMissingDiagonal_SeqSELL,
1861: /*114*/ 0,
1862: 0,
1863: 0,
1864: 0,
1865: 0,
1866: /*119*/ 0,
1867: 0,
1868: 0,
1869: 0,
1870: 0,
1871: /*124*/ 0,
1872: 0,
1873: 0,
1874: 0,
1875: 0,
1876: /*129*/ 0,
1877: 0,
1878: 0,
1879: 0,
1880: 0,
1881: /*134*/ 0,
1882: 0,
1883: 0,
1884: 0,
1885: 0,
1886: /*139*/ 0,
1887: 0,
1888: 0,
1889: MatFDColoringSetUp_SeqXAIJ,
1890: 0,
1891: /*144*/0
1892: };
1894: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1895: {
1896: Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1900: if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1902: /* allocate space for values if not already there */
1903: if (!a->saved_values) {
1904: PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);
1905: PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));
1906: }
1908: /* copy values over */
1909: PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);
1910: return(0);
1911: }
1913: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1914: {
1915: Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1919: if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1920: if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1921: PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);
1922: return(0);
1923: }
1925: /*@C
1926: MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1928: Not Collective
1930: Input Parameters:
1931: . mat - a MATSEQSELL matrix
1932: . array - pointer to the data
1934: Level: intermediate
1936: .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1937: @*/
1938: PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1939: {
1943: PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));
1944: return(0);
1945: }
1947: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1948: {
1949: Mat_SeqSELL *b;
1950: PetscMPIInt size;
1954: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
1955: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1957: PetscNewLog(B,&b);
1959: B->data = (void*)b;
1961: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1963: b->row = 0;
1964: b->col = 0;
1965: b->icol = 0;
1966: b->reallocs = 0;
1967: b->ignorezeroentries = PETSC_FALSE;
1968: b->roworiented = PETSC_TRUE;
1969: b->nonew = 0;
1970: b->diag = 0;
1971: b->solve_work = 0;
1972: B->spptr = 0;
1973: b->saved_values = 0;
1974: b->idiag = 0;
1975: b->mdiag = 0;
1976: b->ssor_work = 0;
1977: b->omega = 1.0;
1978: b->fshift = 0.0;
1979: b->idiagvalid = PETSC_FALSE;
1980: b->keepnonzeropattern = PETSC_FALSE;
1982: PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);
1983: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);
1984: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);
1985: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);
1986: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);
1987: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);
1988: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);
1989: return(0);
1990: }
1992: /*
1993: Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1994: */
1995: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1996: {
1997: Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data;
1998: PetscInt i,m=A->rmap->n;
1999: PetscInt totalslices=a->totalslices;
2003: c = (Mat_SeqSELL*)C->data;
2005: C->factortype = A->factortype;
2006: c->row = 0;
2007: c->col = 0;
2008: c->icol = 0;
2009: c->reallocs = 0;
2011: C->assembled = PETSC_TRUE;
2013: PetscLayoutReference(A->rmap,&C->rmap);
2014: PetscLayoutReference(A->cmap,&C->cmap);
2016: PetscMalloc1(8*totalslices,&c->rlen);
2017: PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2018: PetscMalloc1(totalslices+1,&c->sliidx);
2019: PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));
2021: for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2022: for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2024: /* allocate the matrix space */
2025: if (mallocmatspace) {
2026: PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);
2027: PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));
2029: c->singlemalloc = PETSC_TRUE;
2031: if (m > 0) {
2032: PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);
2033: if (cpvalues == MAT_COPY_VALUES) {
2034: PetscArraycpy(c->val,a->val,a->maxallocmat);
2035: } else {
2036: PetscArrayzero(c->val,a->maxallocmat);
2037: }
2038: }
2039: }
2041: c->ignorezeroentries = a->ignorezeroentries;
2042: c->roworiented = a->roworiented;
2043: c->nonew = a->nonew;
2044: if (a->diag) {
2045: PetscMalloc1(m,&c->diag);
2046: PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2047: for (i=0; i<m; i++) {
2048: c->diag[i] = a->diag[i];
2049: }
2050: } else c->diag = 0;
2052: c->solve_work = 0;
2053: c->saved_values = 0;
2054: c->idiag = 0;
2055: c->ssor_work = 0;
2056: c->keepnonzeropattern = a->keepnonzeropattern;
2057: c->free_val = PETSC_TRUE;
2058: c->free_colidx = PETSC_TRUE;
2060: c->maxallocmat = a->maxallocmat;
2061: c->maxallocrow = a->maxallocrow;
2062: c->rlenmax = a->rlenmax;
2063: c->nz = a->nz;
2064: C->preallocated = PETSC_TRUE;
2066: c->nonzerorowcnt = a->nonzerorowcnt;
2067: C->nonzerostate = A->nonzerostate;
2069: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2070: return(0);
2071: }
2073: PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2074: {
2078: MatCreate(PetscObjectComm((PetscObject)A),B);
2079: MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);
2080: if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2081: MatSetBlockSizesFromMats(*B,A,A);
2082: }
2083: MatSetType(*B,((PetscObject)A)->type_name);
2084: MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);
2085: return(0);
2086: }
2088: /*@C
2089: MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2091: Collective
2093: Input Parameters:
2094: + comm - MPI communicator, set to PETSC_COMM_SELF
2095: . m - number of rows
2096: . n - number of columns
2097: . rlenmax - maximum number of nonzeros in a row
2098: - rlen - array containing the number of nonzeros in the various rows
2099: (possibly different for each row) or NULL
2101: Output Parameter:
2102: . A - the matrix
2104: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2105: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2106: [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2108: Notes:
2109: If nnz is given then nz is ignored
2111: Specify the preallocated storage with either rlenmax or rlen (not both).
2112: Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2113: allocation. For large problems you MUST preallocate memory or you
2114: will get TERRIBLE performance, see the users' manual chapter on matrices.
2116: Level: intermediate
2118: .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2120: @*/
2121: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2122: {
2126: MatCreate(comm,A);
2127: MatSetSizes(*A,m,n,m,n);
2128: MatSetType(*A,MATSEQSELL);
2129: MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);
2130: return(0);
2131: }
2133: PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2134: {
2135: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2136: PetscInt totalslices=a->totalslices;
2140: /* If the matrix dimensions are not equal,or no of nonzeros */
2141: if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2142: *flg = PETSC_FALSE;
2143: return(0);
2144: }
2145: /* if the a->colidx are the same */
2146: PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);
2147: if (!*flg) return(0);
2148: /* if a->val are the same */
2149: PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);
2150: return(0);
2151: }
2153: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2154: {
2155: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2158: a->idiagvalid = PETSC_FALSE;
2159: return(0);
2160: }
2162: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2163: {
2164: #if defined(PETSC_USE_COMPLEX)
2165: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2166: PetscInt i;
2167: PetscScalar *val = a->val;
2170: for (i=0; i<a->sliidx[a->totalslices]; i++) {
2171: val[i] = PetscConj(val[i]);
2172: }
2173: #else
2175: #endif
2176: return(0);
2177: }