Actual source code: maij.c
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes:
36: The reference count on the AIJ matrix is not increased so you should not destroy it.
38: .seealso: MatCreateMAIJ()
39: @*/
40: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
41: {
42: PetscBool ismpimaij,isseqmaij;
44: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
45: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
46: if (ismpimaij) {
47: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49: *B = b->A;
50: } else if (isseqmaij) {
51: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53: *B = b->AIJ;
54: } else {
55: *B = A;
56: }
57: return 0;
58: }
60: /*@
61: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63: Logically Collective
65: Input Parameters:
66: + A - the MAIJ matrix
67: - dof - the block size for the new matrix
69: Output Parameter:
70: . B - the new MAIJ matrix
72: Level: advanced
74: .seealso: MatCreateMAIJ()
75: @*/
76: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
77: {
78: Mat Aij = NULL;
81: MatMAIJGetAIJ(A,&Aij);
82: MatCreateMAIJ(Aij,dof,B);
83: return 0;
84: }
86: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
87: {
88: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
90: MatDestroy(&b->AIJ);
91: PetscFree(A->data);
92: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
93: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
94: return 0;
95: }
97: PetscErrorCode MatSetUp_MAIJ(Mat A)
98: {
99: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
100: }
102: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
103: {
104: Mat B;
106: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
107: MatView(B,viewer);
108: MatDestroy(&B);
109: return 0;
110: }
112: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
113: {
114: Mat B;
116: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
117: MatView(B,viewer);
118: MatDestroy(&B);
119: return 0;
120: }
122: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
123: {
124: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
126: MatDestroy(&b->AIJ);
127: MatDestroy(&b->OAIJ);
128: MatDestroy(&b->A);
129: VecScatterDestroy(&b->ctx);
130: VecDestroy(&b->w);
131: PetscFree(A->data);
132: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
133: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
134: PetscObjectChangeTypeName((PetscObject)A,NULL);
135: return 0;
136: }
138: /*MC
139: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
140: multicomponent problems, interpolating or restricting each component the same way independently.
141: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
143: Operations provided:
144: .vb
145: MatMult
146: MatMultTranspose
147: MatMultAdd
148: MatMultTransposeAdd
149: .ve
151: Level: advanced
153: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
154: M*/
156: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
157: {
158: Mat_MPIMAIJ *b;
159: PetscMPIInt size;
161: PetscNewLog(A,&b);
162: A->data = (void*)b;
164: PetscMemzero(A->ops,sizeof(struct _MatOps));
166: A->ops->setup = MatSetUp_MAIJ;
168: b->AIJ = NULL;
169: b->dof = 0;
170: b->OAIJ = NULL;
171: b->ctx = NULL;
172: b->w = NULL;
173: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
174: if (size == 1) {
175: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
176: } else {
177: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
178: }
179: A->preallocated = PETSC_TRUE;
180: A->assembled = PETSC_TRUE;
181: return 0;
182: }
184: /* --------------------------------------------------------------------------------------*/
185: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
186: {
187: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
188: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
189: const PetscScalar *x,*v;
190: PetscScalar *y, sum1, sum2;
191: PetscInt nonzerorow=0,n,i,jrow,j;
192: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
194: VecGetArrayRead(xx,&x);
195: VecGetArray(yy,&y);
196: idx = a->j;
197: v = a->a;
198: ii = a->i;
200: for (i=0; i<m; i++) {
201: jrow = ii[i];
202: n = ii[i+1] - jrow;
203: sum1 = 0.0;
204: sum2 = 0.0;
206: nonzerorow += (n>0);
207: for (j=0; j<n; j++) {
208: sum1 += v[jrow]*x[2*idx[jrow]];
209: sum2 += v[jrow]*x[2*idx[jrow]+1];
210: jrow++;
211: }
212: y[2*i] = sum1;
213: y[2*i+1] = sum2;
214: }
216: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
217: VecRestoreArrayRead(xx,&x);
218: VecRestoreArray(yy,&y);
219: return 0;
220: }
222: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
223: {
224: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
225: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
226: const PetscScalar *x,*v;
227: PetscScalar *y,alpha1,alpha2;
228: PetscInt n,i;
229: const PetscInt m = b->AIJ->rmap->n,*idx;
231: VecSet(yy,0.0);
232: VecGetArrayRead(xx,&x);
233: VecGetArray(yy,&y);
235: for (i=0; i<m; i++) {
236: idx = a->j + a->i[i];
237: v = a->a + a->i[i];
238: n = a->i[i+1] - a->i[i];
239: alpha1 = x[2*i];
240: alpha2 = x[2*i+1];
241: while (n-->0) {
242: y[2*(*idx)] += alpha1*(*v);
243: y[2*(*idx)+1] += alpha2*(*v);
244: idx++; v++;
245: }
246: }
247: PetscLogFlops(4.0*a->nz);
248: VecRestoreArrayRead(xx,&x);
249: VecRestoreArray(yy,&y);
250: return 0;
251: }
253: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
254: {
255: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
256: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
257: const PetscScalar *x,*v;
258: PetscScalar *y,sum1, sum2;
259: PetscInt n,i,jrow,j;
260: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
262: if (yy != zz) VecCopy(yy,zz);
263: VecGetArrayRead(xx,&x);
264: VecGetArray(zz,&y);
265: idx = a->j;
266: v = a->a;
267: ii = a->i;
269: for (i=0; i<m; i++) {
270: jrow = ii[i];
271: n = ii[i+1] - jrow;
272: sum1 = 0.0;
273: sum2 = 0.0;
274: for (j=0; j<n; j++) {
275: sum1 += v[jrow]*x[2*idx[jrow]];
276: sum2 += v[jrow]*x[2*idx[jrow]+1];
277: jrow++;
278: }
279: y[2*i] += sum1;
280: y[2*i+1] += sum2;
281: }
283: PetscLogFlops(4.0*a->nz);
284: VecRestoreArrayRead(xx,&x);
285: VecRestoreArray(zz,&y);
286: return 0;
287: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
292: const PetscScalar *x,*v;
293: PetscScalar *y,alpha1,alpha2;
294: PetscInt n,i;
295: const PetscInt m = b->AIJ->rmap->n,*idx;
297: if (yy != zz) VecCopy(yy,zz);
298: VecGetArrayRead(xx,&x);
299: VecGetArray(zz,&y);
301: for (i=0; i<m; i++) {
302: idx = a->j + a->i[i];
303: v = a->a + a->i[i];
304: n = a->i[i+1] - a->i[i];
305: alpha1 = x[2*i];
306: alpha2 = x[2*i+1];
307: while (n-->0) {
308: y[2*(*idx)] += alpha1*(*v);
309: y[2*(*idx)+1] += alpha2*(*v);
310: idx++; v++;
311: }
312: }
313: PetscLogFlops(4.0*a->nz);
314: VecRestoreArrayRead(xx,&x);
315: VecRestoreArray(zz,&y);
316: return 0;
317: }
318: /* --------------------------------------------------------------------------------------*/
319: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
320: {
321: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
322: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
323: const PetscScalar *x,*v;
324: PetscScalar *y,sum1, sum2, sum3;
325: PetscInt nonzerorow=0,n,i,jrow,j;
326: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
328: VecGetArrayRead(xx,&x);
329: VecGetArray(yy,&y);
330: idx = a->j;
331: v = a->a;
332: ii = a->i;
334: for (i=0; i<m; i++) {
335: jrow = ii[i];
336: n = ii[i+1] - jrow;
337: sum1 = 0.0;
338: sum2 = 0.0;
339: sum3 = 0.0;
341: nonzerorow += (n>0);
342: for (j=0; j<n; j++) {
343: sum1 += v[jrow]*x[3*idx[jrow]];
344: sum2 += v[jrow]*x[3*idx[jrow]+1];
345: sum3 += v[jrow]*x[3*idx[jrow]+2];
346: jrow++;
347: }
348: y[3*i] = sum1;
349: y[3*i+1] = sum2;
350: y[3*i+2] = sum3;
351: }
353: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
354: VecRestoreArrayRead(xx,&x);
355: VecRestoreArray(yy,&y);
356: return 0;
357: }
359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
363: const PetscScalar *x,*v;
364: PetscScalar *y,alpha1,alpha2,alpha3;
365: PetscInt n,i;
366: const PetscInt m = b->AIJ->rmap->n,*idx;
368: VecSet(yy,0.0);
369: VecGetArrayRead(xx,&x);
370: VecGetArray(yy,&y);
372: for (i=0; i<m; i++) {
373: idx = a->j + a->i[i];
374: v = a->a + a->i[i];
375: n = a->i[i+1] - a->i[i];
376: alpha1 = x[3*i];
377: alpha2 = x[3*i+1];
378: alpha3 = x[3*i+2];
379: while (n-->0) {
380: y[3*(*idx)] += alpha1*(*v);
381: y[3*(*idx)+1] += alpha2*(*v);
382: y[3*(*idx)+2] += alpha3*(*v);
383: idx++; v++;
384: }
385: }
386: PetscLogFlops(6.0*a->nz);
387: VecRestoreArrayRead(xx,&x);
388: VecRestoreArray(yy,&y);
389: return 0;
390: }
392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
395: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
396: const PetscScalar *x,*v;
397: PetscScalar *y,sum1, sum2, sum3;
398: PetscInt n,i,jrow,j;
399: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
401: if (yy != zz) VecCopy(yy,zz);
402: VecGetArrayRead(xx,&x);
403: VecGetArray(zz,&y);
404: idx = a->j;
405: v = a->a;
406: ii = a->i;
408: for (i=0; i<m; i++) {
409: jrow = ii[i];
410: n = ii[i+1] - jrow;
411: sum1 = 0.0;
412: sum2 = 0.0;
413: sum3 = 0.0;
414: for (j=0; j<n; j++) {
415: sum1 += v[jrow]*x[3*idx[jrow]];
416: sum2 += v[jrow]*x[3*idx[jrow]+1];
417: sum3 += v[jrow]*x[3*idx[jrow]+2];
418: jrow++;
419: }
420: y[3*i] += sum1;
421: y[3*i+1] += sum2;
422: y[3*i+2] += sum3;
423: }
425: PetscLogFlops(6.0*a->nz);
426: VecRestoreArrayRead(xx,&x);
427: VecRestoreArray(zz,&y);
428: return 0;
429: }
430: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
431: {
432: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
433: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
434: const PetscScalar *x,*v;
435: PetscScalar *y,alpha1,alpha2,alpha3;
436: PetscInt n,i;
437: const PetscInt m = b->AIJ->rmap->n,*idx;
439: if (yy != zz) VecCopy(yy,zz);
440: VecGetArrayRead(xx,&x);
441: VecGetArray(zz,&y);
442: for (i=0; i<m; i++) {
443: idx = a->j + a->i[i];
444: v = a->a + a->i[i];
445: n = a->i[i+1] - a->i[i];
446: alpha1 = x[3*i];
447: alpha2 = x[3*i+1];
448: alpha3 = x[3*i+2];
449: while (n-->0) {
450: y[3*(*idx)] += alpha1*(*v);
451: y[3*(*idx)+1] += alpha2*(*v);
452: y[3*(*idx)+2] += alpha3*(*v);
453: idx++; v++;
454: }
455: }
456: PetscLogFlops(6.0*a->nz);
457: VecRestoreArrayRead(xx,&x);
458: VecRestoreArray(zz,&y);
459: return 0;
460: }
462: /* ------------------------------------------------------------------------------*/
463: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
464: {
465: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
466: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
467: const PetscScalar *x,*v;
468: PetscScalar *y,sum1, sum2, sum3, sum4;
469: PetscInt nonzerorow=0,n,i,jrow,j;
470: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
472: VecGetArrayRead(xx,&x);
473: VecGetArray(yy,&y);
474: idx = a->j;
475: v = a->a;
476: ii = a->i;
478: for (i=0; i<m; i++) {
479: jrow = ii[i];
480: n = ii[i+1] - jrow;
481: sum1 = 0.0;
482: sum2 = 0.0;
483: sum3 = 0.0;
484: sum4 = 0.0;
485: nonzerorow += (n>0);
486: for (j=0; j<n; j++) {
487: sum1 += v[jrow]*x[4*idx[jrow]];
488: sum2 += v[jrow]*x[4*idx[jrow]+1];
489: sum3 += v[jrow]*x[4*idx[jrow]+2];
490: sum4 += v[jrow]*x[4*idx[jrow]+3];
491: jrow++;
492: }
493: y[4*i] = sum1;
494: y[4*i+1] = sum2;
495: y[4*i+2] = sum3;
496: y[4*i+3] = sum4;
497: }
499: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
500: VecRestoreArrayRead(xx,&x);
501: VecRestoreArray(yy,&y);
502: return 0;
503: }
505: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
506: {
507: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
508: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
509: const PetscScalar *x,*v;
510: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
511: PetscInt n,i;
512: const PetscInt m = b->AIJ->rmap->n,*idx;
514: VecSet(yy,0.0);
515: VecGetArrayRead(xx,&x);
516: VecGetArray(yy,&y);
517: for (i=0; i<m; i++) {
518: idx = a->j + a->i[i];
519: v = a->a + a->i[i];
520: n = a->i[i+1] - a->i[i];
521: alpha1 = x[4*i];
522: alpha2 = x[4*i+1];
523: alpha3 = x[4*i+2];
524: alpha4 = x[4*i+3];
525: while (n-->0) {
526: y[4*(*idx)] += alpha1*(*v);
527: y[4*(*idx)+1] += alpha2*(*v);
528: y[4*(*idx)+2] += alpha3*(*v);
529: y[4*(*idx)+3] += alpha4*(*v);
530: idx++; v++;
531: }
532: }
533: PetscLogFlops(8.0*a->nz);
534: VecRestoreArrayRead(xx,&x);
535: VecRestoreArray(yy,&y);
536: return 0;
537: }
539: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
540: {
541: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
542: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
543: const PetscScalar *x,*v;
544: PetscScalar *y,sum1, sum2, sum3, sum4;
545: PetscInt n,i,jrow,j;
546: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
548: if (yy != zz) VecCopy(yy,zz);
549: VecGetArrayRead(xx,&x);
550: VecGetArray(zz,&y);
551: idx = a->j;
552: v = a->a;
553: ii = a->i;
555: for (i=0; i<m; i++) {
556: jrow = ii[i];
557: n = ii[i+1] - jrow;
558: sum1 = 0.0;
559: sum2 = 0.0;
560: sum3 = 0.0;
561: sum4 = 0.0;
562: for (j=0; j<n; j++) {
563: sum1 += v[jrow]*x[4*idx[jrow]];
564: sum2 += v[jrow]*x[4*idx[jrow]+1];
565: sum3 += v[jrow]*x[4*idx[jrow]+2];
566: sum4 += v[jrow]*x[4*idx[jrow]+3];
567: jrow++;
568: }
569: y[4*i] += sum1;
570: y[4*i+1] += sum2;
571: y[4*i+2] += sum3;
572: y[4*i+3] += sum4;
573: }
575: PetscLogFlops(8.0*a->nz);
576: VecRestoreArrayRead(xx,&x);
577: VecRestoreArray(zz,&y);
578: return 0;
579: }
580: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
581: {
582: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
583: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
584: const PetscScalar *x,*v;
585: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
586: PetscInt n,i;
587: const PetscInt m = b->AIJ->rmap->n,*idx;
589: if (yy != zz) VecCopy(yy,zz);
590: VecGetArrayRead(xx,&x);
591: VecGetArray(zz,&y);
593: for (i=0; i<m; i++) {
594: idx = a->j + a->i[i];
595: v = a->a + a->i[i];
596: n = a->i[i+1] - a->i[i];
597: alpha1 = x[4*i];
598: alpha2 = x[4*i+1];
599: alpha3 = x[4*i+2];
600: alpha4 = x[4*i+3];
601: while (n-->0) {
602: y[4*(*idx)] += alpha1*(*v);
603: y[4*(*idx)+1] += alpha2*(*v);
604: y[4*(*idx)+2] += alpha3*(*v);
605: y[4*(*idx)+3] += alpha4*(*v);
606: idx++; v++;
607: }
608: }
609: PetscLogFlops(8.0*a->nz);
610: VecRestoreArrayRead(xx,&x);
611: VecRestoreArray(zz,&y);
612: return 0;
613: }
614: /* ------------------------------------------------------------------------------*/
616: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
617: {
618: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
619: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
620: const PetscScalar *x,*v;
621: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
622: PetscInt nonzerorow=0,n,i,jrow,j;
623: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
625: VecGetArrayRead(xx,&x);
626: VecGetArray(yy,&y);
627: idx = a->j;
628: v = a->a;
629: ii = a->i;
631: for (i=0; i<m; i++) {
632: jrow = ii[i];
633: n = ii[i+1] - jrow;
634: sum1 = 0.0;
635: sum2 = 0.0;
636: sum3 = 0.0;
637: sum4 = 0.0;
638: sum5 = 0.0;
640: nonzerorow += (n>0);
641: for (j=0; j<n; j++) {
642: sum1 += v[jrow]*x[5*idx[jrow]];
643: sum2 += v[jrow]*x[5*idx[jrow]+1];
644: sum3 += v[jrow]*x[5*idx[jrow]+2];
645: sum4 += v[jrow]*x[5*idx[jrow]+3];
646: sum5 += v[jrow]*x[5*idx[jrow]+4];
647: jrow++;
648: }
649: y[5*i] = sum1;
650: y[5*i+1] = sum2;
651: y[5*i+2] = sum3;
652: y[5*i+3] = sum4;
653: y[5*i+4] = sum5;
654: }
656: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
657: VecRestoreArrayRead(xx,&x);
658: VecRestoreArray(yy,&y);
659: return 0;
660: }
662: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
663: {
664: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
665: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
666: const PetscScalar *x,*v;
667: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
668: PetscInt n,i;
669: const PetscInt m = b->AIJ->rmap->n,*idx;
671: VecSet(yy,0.0);
672: VecGetArrayRead(xx,&x);
673: VecGetArray(yy,&y);
675: for (i=0; i<m; i++) {
676: idx = a->j + a->i[i];
677: v = a->a + a->i[i];
678: n = a->i[i+1] - a->i[i];
679: alpha1 = x[5*i];
680: alpha2 = x[5*i+1];
681: alpha3 = x[5*i+2];
682: alpha4 = x[5*i+3];
683: alpha5 = x[5*i+4];
684: while (n-->0) {
685: y[5*(*idx)] += alpha1*(*v);
686: y[5*(*idx)+1] += alpha2*(*v);
687: y[5*(*idx)+2] += alpha3*(*v);
688: y[5*(*idx)+3] += alpha4*(*v);
689: y[5*(*idx)+4] += alpha5*(*v);
690: idx++; v++;
691: }
692: }
693: PetscLogFlops(10.0*a->nz);
694: VecRestoreArrayRead(xx,&x);
695: VecRestoreArray(yy,&y);
696: return 0;
697: }
699: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
700: {
701: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
702: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
703: const PetscScalar *x,*v;
704: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
705: PetscInt n,i,jrow,j;
706: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
708: if (yy != zz) VecCopy(yy,zz);
709: VecGetArrayRead(xx,&x);
710: VecGetArray(zz,&y);
711: idx = a->j;
712: v = a->a;
713: ii = a->i;
715: for (i=0; i<m; i++) {
716: jrow = ii[i];
717: n = ii[i+1] - jrow;
718: sum1 = 0.0;
719: sum2 = 0.0;
720: sum3 = 0.0;
721: sum4 = 0.0;
722: sum5 = 0.0;
723: for (j=0; j<n; j++) {
724: sum1 += v[jrow]*x[5*idx[jrow]];
725: sum2 += v[jrow]*x[5*idx[jrow]+1];
726: sum3 += v[jrow]*x[5*idx[jrow]+2];
727: sum4 += v[jrow]*x[5*idx[jrow]+3];
728: sum5 += v[jrow]*x[5*idx[jrow]+4];
729: jrow++;
730: }
731: y[5*i] += sum1;
732: y[5*i+1] += sum2;
733: y[5*i+2] += sum3;
734: y[5*i+3] += sum4;
735: y[5*i+4] += sum5;
736: }
738: PetscLogFlops(10.0*a->nz);
739: VecRestoreArrayRead(xx,&x);
740: VecRestoreArray(zz,&y);
741: return 0;
742: }
744: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
745: {
746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
748: const PetscScalar *x,*v;
749: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750: PetscInt n,i;
751: const PetscInt m = b->AIJ->rmap->n,*idx;
753: if (yy != zz) VecCopy(yy,zz);
754: VecGetArrayRead(xx,&x);
755: VecGetArray(zz,&y);
757: for (i=0; i<m; i++) {
758: idx = a->j + a->i[i];
759: v = a->a + a->i[i];
760: n = a->i[i+1] - a->i[i];
761: alpha1 = x[5*i];
762: alpha2 = x[5*i+1];
763: alpha3 = x[5*i+2];
764: alpha4 = x[5*i+3];
765: alpha5 = x[5*i+4];
766: while (n-->0) {
767: y[5*(*idx)] += alpha1*(*v);
768: y[5*(*idx)+1] += alpha2*(*v);
769: y[5*(*idx)+2] += alpha3*(*v);
770: y[5*(*idx)+3] += alpha4*(*v);
771: y[5*(*idx)+4] += alpha5*(*v);
772: idx++; v++;
773: }
774: }
775: PetscLogFlops(10.0*a->nz);
776: VecRestoreArrayRead(xx,&x);
777: VecRestoreArray(zz,&y);
778: return 0;
779: }
781: /* ------------------------------------------------------------------------------*/
782: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
783: {
784: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
785: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
786: const PetscScalar *x,*v;
787: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
788: PetscInt nonzerorow=0,n,i,jrow,j;
789: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
791: VecGetArrayRead(xx,&x);
792: VecGetArray(yy,&y);
793: idx = a->j;
794: v = a->a;
795: ii = a->i;
797: for (i=0; i<m; i++) {
798: jrow = ii[i];
799: n = ii[i+1] - jrow;
800: sum1 = 0.0;
801: sum2 = 0.0;
802: sum3 = 0.0;
803: sum4 = 0.0;
804: sum5 = 0.0;
805: sum6 = 0.0;
807: nonzerorow += (n>0);
808: for (j=0; j<n; j++) {
809: sum1 += v[jrow]*x[6*idx[jrow]];
810: sum2 += v[jrow]*x[6*idx[jrow]+1];
811: sum3 += v[jrow]*x[6*idx[jrow]+2];
812: sum4 += v[jrow]*x[6*idx[jrow]+3];
813: sum5 += v[jrow]*x[6*idx[jrow]+4];
814: sum6 += v[jrow]*x[6*idx[jrow]+5];
815: jrow++;
816: }
817: y[6*i] = sum1;
818: y[6*i+1] = sum2;
819: y[6*i+2] = sum3;
820: y[6*i+3] = sum4;
821: y[6*i+4] = sum5;
822: y[6*i+5] = sum6;
823: }
825: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
826: VecRestoreArrayRead(xx,&x);
827: VecRestoreArray(yy,&y);
828: return 0;
829: }
831: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
832: {
833: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
834: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
835: const PetscScalar *x,*v;
836: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
837: PetscInt n,i;
838: const PetscInt m = b->AIJ->rmap->n,*idx;
840: VecSet(yy,0.0);
841: VecGetArrayRead(xx,&x);
842: VecGetArray(yy,&y);
844: for (i=0; i<m; i++) {
845: idx = a->j + a->i[i];
846: v = a->a + a->i[i];
847: n = a->i[i+1] - a->i[i];
848: alpha1 = x[6*i];
849: alpha2 = x[6*i+1];
850: alpha3 = x[6*i+2];
851: alpha4 = x[6*i+3];
852: alpha5 = x[6*i+4];
853: alpha6 = x[6*i+5];
854: while (n-->0) {
855: y[6*(*idx)] += alpha1*(*v);
856: y[6*(*idx)+1] += alpha2*(*v);
857: y[6*(*idx)+2] += alpha3*(*v);
858: y[6*(*idx)+3] += alpha4*(*v);
859: y[6*(*idx)+4] += alpha5*(*v);
860: y[6*(*idx)+5] += alpha6*(*v);
861: idx++; v++;
862: }
863: }
864: PetscLogFlops(12.0*a->nz);
865: VecRestoreArrayRead(xx,&x);
866: VecRestoreArray(yy,&y);
867: return 0;
868: }
870: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
871: {
872: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
873: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
874: const PetscScalar *x,*v;
875: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
876: PetscInt n,i,jrow,j;
877: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
879: if (yy != zz) VecCopy(yy,zz);
880: VecGetArrayRead(xx,&x);
881: VecGetArray(zz,&y);
882: idx = a->j;
883: v = a->a;
884: ii = a->i;
886: for (i=0; i<m; i++) {
887: jrow = ii[i];
888: n = ii[i+1] - jrow;
889: sum1 = 0.0;
890: sum2 = 0.0;
891: sum3 = 0.0;
892: sum4 = 0.0;
893: sum5 = 0.0;
894: sum6 = 0.0;
895: for (j=0; j<n; j++) {
896: sum1 += v[jrow]*x[6*idx[jrow]];
897: sum2 += v[jrow]*x[6*idx[jrow]+1];
898: sum3 += v[jrow]*x[6*idx[jrow]+2];
899: sum4 += v[jrow]*x[6*idx[jrow]+3];
900: sum5 += v[jrow]*x[6*idx[jrow]+4];
901: sum6 += v[jrow]*x[6*idx[jrow]+5];
902: jrow++;
903: }
904: y[6*i] += sum1;
905: y[6*i+1] += sum2;
906: y[6*i+2] += sum3;
907: y[6*i+3] += sum4;
908: y[6*i+4] += sum5;
909: y[6*i+5] += sum6;
910: }
912: PetscLogFlops(12.0*a->nz);
913: VecRestoreArrayRead(xx,&x);
914: VecRestoreArray(zz,&y);
915: return 0;
916: }
918: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
919: {
920: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
921: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
922: const PetscScalar *x,*v;
923: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
924: PetscInt n,i;
925: const PetscInt m = b->AIJ->rmap->n,*idx;
927: if (yy != zz) VecCopy(yy,zz);
928: VecGetArrayRead(xx,&x);
929: VecGetArray(zz,&y);
931: for (i=0; i<m; i++) {
932: idx = a->j + a->i[i];
933: v = a->a + a->i[i];
934: n = a->i[i+1] - a->i[i];
935: alpha1 = x[6*i];
936: alpha2 = x[6*i+1];
937: alpha3 = x[6*i+2];
938: alpha4 = x[6*i+3];
939: alpha5 = x[6*i+4];
940: alpha6 = x[6*i+5];
941: while (n-->0) {
942: y[6*(*idx)] += alpha1*(*v);
943: y[6*(*idx)+1] += alpha2*(*v);
944: y[6*(*idx)+2] += alpha3*(*v);
945: y[6*(*idx)+3] += alpha4*(*v);
946: y[6*(*idx)+4] += alpha5*(*v);
947: y[6*(*idx)+5] += alpha6*(*v);
948: idx++; v++;
949: }
950: }
951: PetscLogFlops(12.0*a->nz);
952: VecRestoreArrayRead(xx,&x);
953: VecRestoreArray(zz,&y);
954: return 0;
955: }
957: /* ------------------------------------------------------------------------------*/
958: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
959: {
960: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
961: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
962: const PetscScalar *x,*v;
963: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
964: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
965: PetscInt nonzerorow=0,n,i,jrow,j;
967: VecGetArrayRead(xx,&x);
968: VecGetArray(yy,&y);
969: idx = a->j;
970: v = a->a;
971: ii = a->i;
973: for (i=0; i<m; i++) {
974: jrow = ii[i];
975: n = ii[i+1] - jrow;
976: sum1 = 0.0;
977: sum2 = 0.0;
978: sum3 = 0.0;
979: sum4 = 0.0;
980: sum5 = 0.0;
981: sum6 = 0.0;
982: sum7 = 0.0;
984: nonzerorow += (n>0);
985: for (j=0; j<n; j++) {
986: sum1 += v[jrow]*x[7*idx[jrow]];
987: sum2 += v[jrow]*x[7*idx[jrow]+1];
988: sum3 += v[jrow]*x[7*idx[jrow]+2];
989: sum4 += v[jrow]*x[7*idx[jrow]+3];
990: sum5 += v[jrow]*x[7*idx[jrow]+4];
991: sum6 += v[jrow]*x[7*idx[jrow]+5];
992: sum7 += v[jrow]*x[7*idx[jrow]+6];
993: jrow++;
994: }
995: y[7*i] = sum1;
996: y[7*i+1] = sum2;
997: y[7*i+2] = sum3;
998: y[7*i+3] = sum4;
999: y[7*i+4] = sum5;
1000: y[7*i+5] = sum6;
1001: y[7*i+6] = sum7;
1002: }
1004: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1005: VecRestoreArrayRead(xx,&x);
1006: VecRestoreArray(yy,&y);
1007: return 0;
1008: }
1010: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1011: {
1012: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1013: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1014: const PetscScalar *x,*v;
1015: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1016: const PetscInt m = b->AIJ->rmap->n,*idx;
1017: PetscInt n,i;
1019: VecSet(yy,0.0);
1020: VecGetArrayRead(xx,&x);
1021: VecGetArray(yy,&y);
1023: for (i=0; i<m; i++) {
1024: idx = a->j + a->i[i];
1025: v = a->a + a->i[i];
1026: n = a->i[i+1] - a->i[i];
1027: alpha1 = x[7*i];
1028: alpha2 = x[7*i+1];
1029: alpha3 = x[7*i+2];
1030: alpha4 = x[7*i+3];
1031: alpha5 = x[7*i+4];
1032: alpha6 = x[7*i+5];
1033: alpha7 = x[7*i+6];
1034: while (n-->0) {
1035: y[7*(*idx)] += alpha1*(*v);
1036: y[7*(*idx)+1] += alpha2*(*v);
1037: y[7*(*idx)+2] += alpha3*(*v);
1038: y[7*(*idx)+3] += alpha4*(*v);
1039: y[7*(*idx)+4] += alpha5*(*v);
1040: y[7*(*idx)+5] += alpha6*(*v);
1041: y[7*(*idx)+6] += alpha7*(*v);
1042: idx++; v++;
1043: }
1044: }
1045: PetscLogFlops(14.0*a->nz);
1046: VecRestoreArrayRead(xx,&x);
1047: VecRestoreArray(yy,&y);
1048: return 0;
1049: }
1051: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1054: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1055: const PetscScalar *x,*v;
1056: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1057: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1058: PetscInt n,i,jrow,j;
1060: if (yy != zz) VecCopy(yy,zz);
1061: VecGetArrayRead(xx,&x);
1062: VecGetArray(zz,&y);
1063: idx = a->j;
1064: v = a->a;
1065: ii = a->i;
1067: for (i=0; i<m; i++) {
1068: jrow = ii[i];
1069: n = ii[i+1] - jrow;
1070: sum1 = 0.0;
1071: sum2 = 0.0;
1072: sum3 = 0.0;
1073: sum4 = 0.0;
1074: sum5 = 0.0;
1075: sum6 = 0.0;
1076: sum7 = 0.0;
1077: for (j=0; j<n; j++) {
1078: sum1 += v[jrow]*x[7*idx[jrow]];
1079: sum2 += v[jrow]*x[7*idx[jrow]+1];
1080: sum3 += v[jrow]*x[7*idx[jrow]+2];
1081: sum4 += v[jrow]*x[7*idx[jrow]+3];
1082: sum5 += v[jrow]*x[7*idx[jrow]+4];
1083: sum6 += v[jrow]*x[7*idx[jrow]+5];
1084: sum7 += v[jrow]*x[7*idx[jrow]+6];
1085: jrow++;
1086: }
1087: y[7*i] += sum1;
1088: y[7*i+1] += sum2;
1089: y[7*i+2] += sum3;
1090: y[7*i+3] += sum4;
1091: y[7*i+4] += sum5;
1092: y[7*i+5] += sum6;
1093: y[7*i+6] += sum7;
1094: }
1096: PetscLogFlops(14.0*a->nz);
1097: VecRestoreArrayRead(xx,&x);
1098: VecRestoreArray(zz,&y);
1099: return 0;
1100: }
1102: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1103: {
1104: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1105: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1106: const PetscScalar *x,*v;
1107: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1108: const PetscInt m = b->AIJ->rmap->n,*idx;
1109: PetscInt n,i;
1111: if (yy != zz) VecCopy(yy,zz);
1112: VecGetArrayRead(xx,&x);
1113: VecGetArray(zz,&y);
1114: for (i=0; i<m; i++) {
1115: idx = a->j + a->i[i];
1116: v = a->a + a->i[i];
1117: n = a->i[i+1] - a->i[i];
1118: alpha1 = x[7*i];
1119: alpha2 = x[7*i+1];
1120: alpha3 = x[7*i+2];
1121: alpha4 = x[7*i+3];
1122: alpha5 = x[7*i+4];
1123: alpha6 = x[7*i+5];
1124: alpha7 = x[7*i+6];
1125: while (n-->0) {
1126: y[7*(*idx)] += alpha1*(*v);
1127: y[7*(*idx)+1] += alpha2*(*v);
1128: y[7*(*idx)+2] += alpha3*(*v);
1129: y[7*(*idx)+3] += alpha4*(*v);
1130: y[7*(*idx)+4] += alpha5*(*v);
1131: y[7*(*idx)+5] += alpha6*(*v);
1132: y[7*(*idx)+6] += alpha7*(*v);
1133: idx++; v++;
1134: }
1135: }
1136: PetscLogFlops(14.0*a->nz);
1137: VecRestoreArrayRead(xx,&x);
1138: VecRestoreArray(zz,&y);
1139: return 0;
1140: }
1142: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1143: {
1144: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1145: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1146: const PetscScalar *x,*v;
1147: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1148: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1149: PetscInt nonzerorow=0,n,i,jrow,j;
1151: VecGetArrayRead(xx,&x);
1152: VecGetArray(yy,&y);
1153: idx = a->j;
1154: v = a->a;
1155: ii = a->i;
1157: for (i=0; i<m; i++) {
1158: jrow = ii[i];
1159: n = ii[i+1] - jrow;
1160: sum1 = 0.0;
1161: sum2 = 0.0;
1162: sum3 = 0.0;
1163: sum4 = 0.0;
1164: sum5 = 0.0;
1165: sum6 = 0.0;
1166: sum7 = 0.0;
1167: sum8 = 0.0;
1169: nonzerorow += (n>0);
1170: for (j=0; j<n; j++) {
1171: sum1 += v[jrow]*x[8*idx[jrow]];
1172: sum2 += v[jrow]*x[8*idx[jrow]+1];
1173: sum3 += v[jrow]*x[8*idx[jrow]+2];
1174: sum4 += v[jrow]*x[8*idx[jrow]+3];
1175: sum5 += v[jrow]*x[8*idx[jrow]+4];
1176: sum6 += v[jrow]*x[8*idx[jrow]+5];
1177: sum7 += v[jrow]*x[8*idx[jrow]+6];
1178: sum8 += v[jrow]*x[8*idx[jrow]+7];
1179: jrow++;
1180: }
1181: y[8*i] = sum1;
1182: y[8*i+1] = sum2;
1183: y[8*i+2] = sum3;
1184: y[8*i+3] = sum4;
1185: y[8*i+4] = sum5;
1186: y[8*i+5] = sum6;
1187: y[8*i+6] = sum7;
1188: y[8*i+7] = sum8;
1189: }
1191: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1192: VecRestoreArrayRead(xx,&x);
1193: VecRestoreArray(yy,&y);
1194: return 0;
1195: }
1197: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1198: {
1199: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1200: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1201: const PetscScalar *x,*v;
1202: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1203: const PetscInt m = b->AIJ->rmap->n,*idx;
1204: PetscInt n,i;
1206: VecSet(yy,0.0);
1207: VecGetArrayRead(xx,&x);
1208: VecGetArray(yy,&y);
1210: for (i=0; i<m; i++) {
1211: idx = a->j + a->i[i];
1212: v = a->a + a->i[i];
1213: n = a->i[i+1] - a->i[i];
1214: alpha1 = x[8*i];
1215: alpha2 = x[8*i+1];
1216: alpha3 = x[8*i+2];
1217: alpha4 = x[8*i+3];
1218: alpha5 = x[8*i+4];
1219: alpha6 = x[8*i+5];
1220: alpha7 = x[8*i+6];
1221: alpha8 = x[8*i+7];
1222: while (n-->0) {
1223: y[8*(*idx)] += alpha1*(*v);
1224: y[8*(*idx)+1] += alpha2*(*v);
1225: y[8*(*idx)+2] += alpha3*(*v);
1226: y[8*(*idx)+3] += alpha4*(*v);
1227: y[8*(*idx)+4] += alpha5*(*v);
1228: y[8*(*idx)+5] += alpha6*(*v);
1229: y[8*(*idx)+6] += alpha7*(*v);
1230: y[8*(*idx)+7] += alpha8*(*v);
1231: idx++; v++;
1232: }
1233: }
1234: PetscLogFlops(16.0*a->nz);
1235: VecRestoreArrayRead(xx,&x);
1236: VecRestoreArray(yy,&y);
1237: return 0;
1238: }
1240: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1241: {
1242: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1243: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1244: const PetscScalar *x,*v;
1245: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1246: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1247: PetscInt n,i,jrow,j;
1249: if (yy != zz) VecCopy(yy,zz);
1250: VecGetArrayRead(xx,&x);
1251: VecGetArray(zz,&y);
1252: idx = a->j;
1253: v = a->a;
1254: ii = a->i;
1256: for (i=0; i<m; i++) {
1257: jrow = ii[i];
1258: n = ii[i+1] - jrow;
1259: sum1 = 0.0;
1260: sum2 = 0.0;
1261: sum3 = 0.0;
1262: sum4 = 0.0;
1263: sum5 = 0.0;
1264: sum6 = 0.0;
1265: sum7 = 0.0;
1266: sum8 = 0.0;
1267: for (j=0; j<n; j++) {
1268: sum1 += v[jrow]*x[8*idx[jrow]];
1269: sum2 += v[jrow]*x[8*idx[jrow]+1];
1270: sum3 += v[jrow]*x[8*idx[jrow]+2];
1271: sum4 += v[jrow]*x[8*idx[jrow]+3];
1272: sum5 += v[jrow]*x[8*idx[jrow]+4];
1273: sum6 += v[jrow]*x[8*idx[jrow]+5];
1274: sum7 += v[jrow]*x[8*idx[jrow]+6];
1275: sum8 += v[jrow]*x[8*idx[jrow]+7];
1276: jrow++;
1277: }
1278: y[8*i] += sum1;
1279: y[8*i+1] += sum2;
1280: y[8*i+2] += sum3;
1281: y[8*i+3] += sum4;
1282: y[8*i+4] += sum5;
1283: y[8*i+5] += sum6;
1284: y[8*i+6] += sum7;
1285: y[8*i+7] += sum8;
1286: }
1288: PetscLogFlops(16.0*a->nz);
1289: VecRestoreArrayRead(xx,&x);
1290: VecRestoreArray(zz,&y);
1291: return 0;
1292: }
1294: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1295: {
1296: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1297: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1298: const PetscScalar *x,*v;
1299: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1300: const PetscInt m = b->AIJ->rmap->n,*idx;
1301: PetscInt n,i;
1303: if (yy != zz) VecCopy(yy,zz);
1304: VecGetArrayRead(xx,&x);
1305: VecGetArray(zz,&y);
1306: for (i=0; i<m; i++) {
1307: idx = a->j + a->i[i];
1308: v = a->a + a->i[i];
1309: n = a->i[i+1] - a->i[i];
1310: alpha1 = x[8*i];
1311: alpha2 = x[8*i+1];
1312: alpha3 = x[8*i+2];
1313: alpha4 = x[8*i+3];
1314: alpha5 = x[8*i+4];
1315: alpha6 = x[8*i+5];
1316: alpha7 = x[8*i+6];
1317: alpha8 = x[8*i+7];
1318: while (n-->0) {
1319: y[8*(*idx)] += alpha1*(*v);
1320: y[8*(*idx)+1] += alpha2*(*v);
1321: y[8*(*idx)+2] += alpha3*(*v);
1322: y[8*(*idx)+3] += alpha4*(*v);
1323: y[8*(*idx)+4] += alpha5*(*v);
1324: y[8*(*idx)+5] += alpha6*(*v);
1325: y[8*(*idx)+6] += alpha7*(*v);
1326: y[8*(*idx)+7] += alpha8*(*v);
1327: idx++; v++;
1328: }
1329: }
1330: PetscLogFlops(16.0*a->nz);
1331: VecRestoreArrayRead(xx,&x);
1332: VecRestoreArray(zz,&y);
1333: return 0;
1334: }
1336: /* ------------------------------------------------------------------------------*/
1337: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1338: {
1339: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1340: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1341: const PetscScalar *x,*v;
1342: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1343: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1344: PetscInt nonzerorow=0,n,i,jrow,j;
1346: VecGetArrayRead(xx,&x);
1347: VecGetArray(yy,&y);
1348: idx = a->j;
1349: v = a->a;
1350: ii = a->i;
1352: for (i=0; i<m; i++) {
1353: jrow = ii[i];
1354: n = ii[i+1] - jrow;
1355: sum1 = 0.0;
1356: sum2 = 0.0;
1357: sum3 = 0.0;
1358: sum4 = 0.0;
1359: sum5 = 0.0;
1360: sum6 = 0.0;
1361: sum7 = 0.0;
1362: sum8 = 0.0;
1363: sum9 = 0.0;
1365: nonzerorow += (n>0);
1366: for (j=0; j<n; j++) {
1367: sum1 += v[jrow]*x[9*idx[jrow]];
1368: sum2 += v[jrow]*x[9*idx[jrow]+1];
1369: sum3 += v[jrow]*x[9*idx[jrow]+2];
1370: sum4 += v[jrow]*x[9*idx[jrow]+3];
1371: sum5 += v[jrow]*x[9*idx[jrow]+4];
1372: sum6 += v[jrow]*x[9*idx[jrow]+5];
1373: sum7 += v[jrow]*x[9*idx[jrow]+6];
1374: sum8 += v[jrow]*x[9*idx[jrow]+7];
1375: sum9 += v[jrow]*x[9*idx[jrow]+8];
1376: jrow++;
1377: }
1378: y[9*i] = sum1;
1379: y[9*i+1] = sum2;
1380: y[9*i+2] = sum3;
1381: y[9*i+3] = sum4;
1382: y[9*i+4] = sum5;
1383: y[9*i+5] = sum6;
1384: y[9*i+6] = sum7;
1385: y[9*i+7] = sum8;
1386: y[9*i+8] = sum9;
1387: }
1389: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1390: VecRestoreArrayRead(xx,&x);
1391: VecRestoreArray(yy,&y);
1392: return 0;
1393: }
1395: /* ------------------------------------------------------------------------------*/
1397: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1398: {
1399: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1400: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1401: const PetscScalar *x,*v;
1402: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1403: const PetscInt m = b->AIJ->rmap->n,*idx;
1404: PetscInt n,i;
1406: VecSet(yy,0.0);
1407: VecGetArrayRead(xx,&x);
1408: VecGetArray(yy,&y);
1410: for (i=0; i<m; i++) {
1411: idx = a->j + a->i[i];
1412: v = a->a + a->i[i];
1413: n = a->i[i+1] - a->i[i];
1414: alpha1 = x[9*i];
1415: alpha2 = x[9*i+1];
1416: alpha3 = x[9*i+2];
1417: alpha4 = x[9*i+3];
1418: alpha5 = x[9*i+4];
1419: alpha6 = x[9*i+5];
1420: alpha7 = x[9*i+6];
1421: alpha8 = x[9*i+7];
1422: alpha9 = x[9*i+8];
1423: while (n-->0) {
1424: y[9*(*idx)] += alpha1*(*v);
1425: y[9*(*idx)+1] += alpha2*(*v);
1426: y[9*(*idx)+2] += alpha3*(*v);
1427: y[9*(*idx)+3] += alpha4*(*v);
1428: y[9*(*idx)+4] += alpha5*(*v);
1429: y[9*(*idx)+5] += alpha6*(*v);
1430: y[9*(*idx)+6] += alpha7*(*v);
1431: y[9*(*idx)+7] += alpha8*(*v);
1432: y[9*(*idx)+8] += alpha9*(*v);
1433: idx++; v++;
1434: }
1435: }
1436: PetscLogFlops(18.0*a->nz);
1437: VecRestoreArrayRead(xx,&x);
1438: VecRestoreArray(yy,&y);
1439: return 0;
1440: }
1442: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1443: {
1444: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1445: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1446: const PetscScalar *x,*v;
1447: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1448: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1449: PetscInt n,i,jrow,j;
1451: if (yy != zz) VecCopy(yy,zz);
1452: VecGetArrayRead(xx,&x);
1453: VecGetArray(zz,&y);
1454: idx = a->j;
1455: v = a->a;
1456: ii = a->i;
1458: for (i=0; i<m; i++) {
1459: jrow = ii[i];
1460: n = ii[i+1] - jrow;
1461: sum1 = 0.0;
1462: sum2 = 0.0;
1463: sum3 = 0.0;
1464: sum4 = 0.0;
1465: sum5 = 0.0;
1466: sum6 = 0.0;
1467: sum7 = 0.0;
1468: sum8 = 0.0;
1469: sum9 = 0.0;
1470: for (j=0; j<n; j++) {
1471: sum1 += v[jrow]*x[9*idx[jrow]];
1472: sum2 += v[jrow]*x[9*idx[jrow]+1];
1473: sum3 += v[jrow]*x[9*idx[jrow]+2];
1474: sum4 += v[jrow]*x[9*idx[jrow]+3];
1475: sum5 += v[jrow]*x[9*idx[jrow]+4];
1476: sum6 += v[jrow]*x[9*idx[jrow]+5];
1477: sum7 += v[jrow]*x[9*idx[jrow]+6];
1478: sum8 += v[jrow]*x[9*idx[jrow]+7];
1479: sum9 += v[jrow]*x[9*idx[jrow]+8];
1480: jrow++;
1481: }
1482: y[9*i] += sum1;
1483: y[9*i+1] += sum2;
1484: y[9*i+2] += sum3;
1485: y[9*i+3] += sum4;
1486: y[9*i+4] += sum5;
1487: y[9*i+5] += sum6;
1488: y[9*i+6] += sum7;
1489: y[9*i+7] += sum8;
1490: y[9*i+8] += sum9;
1491: }
1493: PetscLogFlops(18.0*a->nz);
1494: VecRestoreArrayRead(xx,&x);
1495: VecRestoreArray(zz,&y);
1496: return 0;
1497: }
1499: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1502: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1503: const PetscScalar *x,*v;
1504: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1505: const PetscInt m = b->AIJ->rmap->n,*idx;
1506: PetscInt n,i;
1508: if (yy != zz) VecCopy(yy,zz);
1509: VecGetArrayRead(xx,&x);
1510: VecGetArray(zz,&y);
1511: for (i=0; i<m; i++) {
1512: idx = a->j + a->i[i];
1513: v = a->a + a->i[i];
1514: n = a->i[i+1] - a->i[i];
1515: alpha1 = x[9*i];
1516: alpha2 = x[9*i+1];
1517: alpha3 = x[9*i+2];
1518: alpha4 = x[9*i+3];
1519: alpha5 = x[9*i+4];
1520: alpha6 = x[9*i+5];
1521: alpha7 = x[9*i+6];
1522: alpha8 = x[9*i+7];
1523: alpha9 = x[9*i+8];
1524: while (n-->0) {
1525: y[9*(*idx)] += alpha1*(*v);
1526: y[9*(*idx)+1] += alpha2*(*v);
1527: y[9*(*idx)+2] += alpha3*(*v);
1528: y[9*(*idx)+3] += alpha4*(*v);
1529: y[9*(*idx)+4] += alpha5*(*v);
1530: y[9*(*idx)+5] += alpha6*(*v);
1531: y[9*(*idx)+6] += alpha7*(*v);
1532: y[9*(*idx)+7] += alpha8*(*v);
1533: y[9*(*idx)+8] += alpha9*(*v);
1534: idx++; v++;
1535: }
1536: }
1537: PetscLogFlops(18.0*a->nz);
1538: VecRestoreArrayRead(xx,&x);
1539: VecRestoreArray(zz,&y);
1540: return 0;
1541: }
1542: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1543: {
1544: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1545: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1546: const PetscScalar *x,*v;
1547: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1548: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1549: PetscInt nonzerorow=0,n,i,jrow,j;
1551: VecGetArrayRead(xx,&x);
1552: VecGetArray(yy,&y);
1553: idx = a->j;
1554: v = a->a;
1555: ii = a->i;
1557: for (i=0; i<m; i++) {
1558: jrow = ii[i];
1559: n = ii[i+1] - jrow;
1560: sum1 = 0.0;
1561: sum2 = 0.0;
1562: sum3 = 0.0;
1563: sum4 = 0.0;
1564: sum5 = 0.0;
1565: sum6 = 0.0;
1566: sum7 = 0.0;
1567: sum8 = 0.0;
1568: sum9 = 0.0;
1569: sum10 = 0.0;
1571: nonzerorow += (n>0);
1572: for (j=0; j<n; j++) {
1573: sum1 += v[jrow]*x[10*idx[jrow]];
1574: sum2 += v[jrow]*x[10*idx[jrow]+1];
1575: sum3 += v[jrow]*x[10*idx[jrow]+2];
1576: sum4 += v[jrow]*x[10*idx[jrow]+3];
1577: sum5 += v[jrow]*x[10*idx[jrow]+4];
1578: sum6 += v[jrow]*x[10*idx[jrow]+5];
1579: sum7 += v[jrow]*x[10*idx[jrow]+6];
1580: sum8 += v[jrow]*x[10*idx[jrow]+7];
1581: sum9 += v[jrow]*x[10*idx[jrow]+8];
1582: sum10 += v[jrow]*x[10*idx[jrow]+9];
1583: jrow++;
1584: }
1585: y[10*i] = sum1;
1586: y[10*i+1] = sum2;
1587: y[10*i+2] = sum3;
1588: y[10*i+3] = sum4;
1589: y[10*i+4] = sum5;
1590: y[10*i+5] = sum6;
1591: y[10*i+6] = sum7;
1592: y[10*i+7] = sum8;
1593: y[10*i+8] = sum9;
1594: y[10*i+9] = sum10;
1595: }
1597: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1598: VecRestoreArrayRead(xx,&x);
1599: VecRestoreArray(yy,&y);
1600: return 0;
1601: }
1603: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1604: {
1605: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1606: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1607: const PetscScalar *x,*v;
1608: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1610: PetscInt n,i,jrow,j;
1612: if (yy != zz) VecCopy(yy,zz);
1613: VecGetArrayRead(xx,&x);
1614: VecGetArray(zz,&y);
1615: idx = a->j;
1616: v = a->a;
1617: ii = a->i;
1619: for (i=0; i<m; i++) {
1620: jrow = ii[i];
1621: n = ii[i+1] - jrow;
1622: sum1 = 0.0;
1623: sum2 = 0.0;
1624: sum3 = 0.0;
1625: sum4 = 0.0;
1626: sum5 = 0.0;
1627: sum6 = 0.0;
1628: sum7 = 0.0;
1629: sum8 = 0.0;
1630: sum9 = 0.0;
1631: sum10 = 0.0;
1632: for (j=0; j<n; j++) {
1633: sum1 += v[jrow]*x[10*idx[jrow]];
1634: sum2 += v[jrow]*x[10*idx[jrow]+1];
1635: sum3 += v[jrow]*x[10*idx[jrow]+2];
1636: sum4 += v[jrow]*x[10*idx[jrow]+3];
1637: sum5 += v[jrow]*x[10*idx[jrow]+4];
1638: sum6 += v[jrow]*x[10*idx[jrow]+5];
1639: sum7 += v[jrow]*x[10*idx[jrow]+6];
1640: sum8 += v[jrow]*x[10*idx[jrow]+7];
1641: sum9 += v[jrow]*x[10*idx[jrow]+8];
1642: sum10 += v[jrow]*x[10*idx[jrow]+9];
1643: jrow++;
1644: }
1645: y[10*i] += sum1;
1646: y[10*i+1] += sum2;
1647: y[10*i+2] += sum3;
1648: y[10*i+3] += sum4;
1649: y[10*i+4] += sum5;
1650: y[10*i+5] += sum6;
1651: y[10*i+6] += sum7;
1652: y[10*i+7] += sum8;
1653: y[10*i+8] += sum9;
1654: y[10*i+9] += sum10;
1655: }
1657: PetscLogFlops(20.0*a->nz);
1658: VecRestoreArrayRead(xx,&x);
1659: VecRestoreArray(yy,&y);
1660: return 0;
1661: }
1663: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1664: {
1665: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1666: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1667: const PetscScalar *x,*v;
1668: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1669: const PetscInt m = b->AIJ->rmap->n,*idx;
1670: PetscInt n,i;
1672: VecSet(yy,0.0);
1673: VecGetArrayRead(xx,&x);
1674: VecGetArray(yy,&y);
1676: for (i=0; i<m; i++) {
1677: idx = a->j + a->i[i];
1678: v = a->a + a->i[i];
1679: n = a->i[i+1] - a->i[i];
1680: alpha1 = x[10*i];
1681: alpha2 = x[10*i+1];
1682: alpha3 = x[10*i+2];
1683: alpha4 = x[10*i+3];
1684: alpha5 = x[10*i+4];
1685: alpha6 = x[10*i+5];
1686: alpha7 = x[10*i+6];
1687: alpha8 = x[10*i+7];
1688: alpha9 = x[10*i+8];
1689: alpha10 = x[10*i+9];
1690: while (n-->0) {
1691: y[10*(*idx)] += alpha1*(*v);
1692: y[10*(*idx)+1] += alpha2*(*v);
1693: y[10*(*idx)+2] += alpha3*(*v);
1694: y[10*(*idx)+3] += alpha4*(*v);
1695: y[10*(*idx)+4] += alpha5*(*v);
1696: y[10*(*idx)+5] += alpha6*(*v);
1697: y[10*(*idx)+6] += alpha7*(*v);
1698: y[10*(*idx)+7] += alpha8*(*v);
1699: y[10*(*idx)+8] += alpha9*(*v);
1700: y[10*(*idx)+9] += alpha10*(*v);
1701: idx++; v++;
1702: }
1703: }
1704: PetscLogFlops(20.0*a->nz);
1705: VecRestoreArrayRead(xx,&x);
1706: VecRestoreArray(yy,&y);
1707: return 0;
1708: }
1710: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1711: {
1712: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1713: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1714: const PetscScalar *x,*v;
1715: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1716: const PetscInt m = b->AIJ->rmap->n,*idx;
1717: PetscInt n,i;
1719: if (yy != zz) VecCopy(yy,zz);
1720: VecGetArrayRead(xx,&x);
1721: VecGetArray(zz,&y);
1722: for (i=0; i<m; i++) {
1723: idx = a->j + a->i[i];
1724: v = a->a + a->i[i];
1725: n = a->i[i+1] - a->i[i];
1726: alpha1 = x[10*i];
1727: alpha2 = x[10*i+1];
1728: alpha3 = x[10*i+2];
1729: alpha4 = x[10*i+3];
1730: alpha5 = x[10*i+4];
1731: alpha6 = x[10*i+5];
1732: alpha7 = x[10*i+6];
1733: alpha8 = x[10*i+7];
1734: alpha9 = x[10*i+8];
1735: alpha10 = x[10*i+9];
1736: while (n-->0) {
1737: y[10*(*idx)] += alpha1*(*v);
1738: y[10*(*idx)+1] += alpha2*(*v);
1739: y[10*(*idx)+2] += alpha3*(*v);
1740: y[10*(*idx)+3] += alpha4*(*v);
1741: y[10*(*idx)+4] += alpha5*(*v);
1742: y[10*(*idx)+5] += alpha6*(*v);
1743: y[10*(*idx)+6] += alpha7*(*v);
1744: y[10*(*idx)+7] += alpha8*(*v);
1745: y[10*(*idx)+8] += alpha9*(*v);
1746: y[10*(*idx)+9] += alpha10*(*v);
1747: idx++; v++;
1748: }
1749: }
1750: PetscLogFlops(20.0*a->nz);
1751: VecRestoreArrayRead(xx,&x);
1752: VecRestoreArray(zz,&y);
1753: return 0;
1754: }
1756: /*--------------------------------------------------------------------------------------------*/
1757: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1758: {
1759: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1760: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1761: const PetscScalar *x,*v;
1762: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1763: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1764: PetscInt nonzerorow=0,n,i,jrow,j;
1766: VecGetArrayRead(xx,&x);
1767: VecGetArray(yy,&y);
1768: idx = a->j;
1769: v = a->a;
1770: ii = a->i;
1772: for (i=0; i<m; i++) {
1773: jrow = ii[i];
1774: n = ii[i+1] - jrow;
1775: sum1 = 0.0;
1776: sum2 = 0.0;
1777: sum3 = 0.0;
1778: sum4 = 0.0;
1779: sum5 = 0.0;
1780: sum6 = 0.0;
1781: sum7 = 0.0;
1782: sum8 = 0.0;
1783: sum9 = 0.0;
1784: sum10 = 0.0;
1785: sum11 = 0.0;
1787: nonzerorow += (n>0);
1788: for (j=0; j<n; j++) {
1789: sum1 += v[jrow]*x[11*idx[jrow]];
1790: sum2 += v[jrow]*x[11*idx[jrow]+1];
1791: sum3 += v[jrow]*x[11*idx[jrow]+2];
1792: sum4 += v[jrow]*x[11*idx[jrow]+3];
1793: sum5 += v[jrow]*x[11*idx[jrow]+4];
1794: sum6 += v[jrow]*x[11*idx[jrow]+5];
1795: sum7 += v[jrow]*x[11*idx[jrow]+6];
1796: sum8 += v[jrow]*x[11*idx[jrow]+7];
1797: sum9 += v[jrow]*x[11*idx[jrow]+8];
1798: sum10 += v[jrow]*x[11*idx[jrow]+9];
1799: sum11 += v[jrow]*x[11*idx[jrow]+10];
1800: jrow++;
1801: }
1802: y[11*i] = sum1;
1803: y[11*i+1] = sum2;
1804: y[11*i+2] = sum3;
1805: y[11*i+3] = sum4;
1806: y[11*i+4] = sum5;
1807: y[11*i+5] = sum6;
1808: y[11*i+6] = sum7;
1809: y[11*i+7] = sum8;
1810: y[11*i+8] = sum9;
1811: y[11*i+9] = sum10;
1812: y[11*i+10] = sum11;
1813: }
1815: PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1816: VecRestoreArrayRead(xx,&x);
1817: VecRestoreArray(yy,&y);
1818: return 0;
1819: }
1821: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1822: {
1823: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1824: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1825: const PetscScalar *x,*v;
1826: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1827: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1828: PetscInt n,i,jrow,j;
1830: if (yy != zz) VecCopy(yy,zz);
1831: VecGetArrayRead(xx,&x);
1832: VecGetArray(zz,&y);
1833: idx = a->j;
1834: v = a->a;
1835: ii = a->i;
1837: for (i=0; i<m; i++) {
1838: jrow = ii[i];
1839: n = ii[i+1] - jrow;
1840: sum1 = 0.0;
1841: sum2 = 0.0;
1842: sum3 = 0.0;
1843: sum4 = 0.0;
1844: sum5 = 0.0;
1845: sum6 = 0.0;
1846: sum7 = 0.0;
1847: sum8 = 0.0;
1848: sum9 = 0.0;
1849: sum10 = 0.0;
1850: sum11 = 0.0;
1851: for (j=0; j<n; j++) {
1852: sum1 += v[jrow]*x[11*idx[jrow]];
1853: sum2 += v[jrow]*x[11*idx[jrow]+1];
1854: sum3 += v[jrow]*x[11*idx[jrow]+2];
1855: sum4 += v[jrow]*x[11*idx[jrow]+3];
1856: sum5 += v[jrow]*x[11*idx[jrow]+4];
1857: sum6 += v[jrow]*x[11*idx[jrow]+5];
1858: sum7 += v[jrow]*x[11*idx[jrow]+6];
1859: sum8 += v[jrow]*x[11*idx[jrow]+7];
1860: sum9 += v[jrow]*x[11*idx[jrow]+8];
1861: sum10 += v[jrow]*x[11*idx[jrow]+9];
1862: sum11 += v[jrow]*x[11*idx[jrow]+10];
1863: jrow++;
1864: }
1865: y[11*i] += sum1;
1866: y[11*i+1] += sum2;
1867: y[11*i+2] += sum3;
1868: y[11*i+3] += sum4;
1869: y[11*i+4] += sum5;
1870: y[11*i+5] += sum6;
1871: y[11*i+6] += sum7;
1872: y[11*i+7] += sum8;
1873: y[11*i+8] += sum9;
1874: y[11*i+9] += sum10;
1875: y[11*i+10] += sum11;
1876: }
1878: PetscLogFlops(22.0*a->nz);
1879: VecRestoreArrayRead(xx,&x);
1880: VecRestoreArray(yy,&y);
1881: return 0;
1882: }
1884: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1885: {
1886: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1887: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1888: const PetscScalar *x,*v;
1889: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1890: const PetscInt m = b->AIJ->rmap->n,*idx;
1891: PetscInt n,i;
1893: VecSet(yy,0.0);
1894: VecGetArrayRead(xx,&x);
1895: VecGetArray(yy,&y);
1897: for (i=0; i<m; i++) {
1898: idx = a->j + a->i[i];
1899: v = a->a + a->i[i];
1900: n = a->i[i+1] - a->i[i];
1901: alpha1 = x[11*i];
1902: alpha2 = x[11*i+1];
1903: alpha3 = x[11*i+2];
1904: alpha4 = x[11*i+3];
1905: alpha5 = x[11*i+4];
1906: alpha6 = x[11*i+5];
1907: alpha7 = x[11*i+6];
1908: alpha8 = x[11*i+7];
1909: alpha9 = x[11*i+8];
1910: alpha10 = x[11*i+9];
1911: alpha11 = x[11*i+10];
1912: while (n-->0) {
1913: y[11*(*idx)] += alpha1*(*v);
1914: y[11*(*idx)+1] += alpha2*(*v);
1915: y[11*(*idx)+2] += alpha3*(*v);
1916: y[11*(*idx)+3] += alpha4*(*v);
1917: y[11*(*idx)+4] += alpha5*(*v);
1918: y[11*(*idx)+5] += alpha6*(*v);
1919: y[11*(*idx)+6] += alpha7*(*v);
1920: y[11*(*idx)+7] += alpha8*(*v);
1921: y[11*(*idx)+8] += alpha9*(*v);
1922: y[11*(*idx)+9] += alpha10*(*v);
1923: y[11*(*idx)+10] += alpha11*(*v);
1924: idx++; v++;
1925: }
1926: }
1927: PetscLogFlops(22.0*a->nz);
1928: VecRestoreArrayRead(xx,&x);
1929: VecRestoreArray(yy,&y);
1930: return 0;
1931: }
1933: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1934: {
1935: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1936: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1937: const PetscScalar *x,*v;
1938: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1939: const PetscInt m = b->AIJ->rmap->n,*idx;
1940: PetscInt n,i;
1942: if (yy != zz) VecCopy(yy,zz);
1943: VecGetArrayRead(xx,&x);
1944: VecGetArray(zz,&y);
1945: for (i=0; i<m; i++) {
1946: idx = a->j + a->i[i];
1947: v = a->a + a->i[i];
1948: n = a->i[i+1] - a->i[i];
1949: alpha1 = x[11*i];
1950: alpha2 = x[11*i+1];
1951: alpha3 = x[11*i+2];
1952: alpha4 = x[11*i+3];
1953: alpha5 = x[11*i+4];
1954: alpha6 = x[11*i+5];
1955: alpha7 = x[11*i+6];
1956: alpha8 = x[11*i+7];
1957: alpha9 = x[11*i+8];
1958: alpha10 = x[11*i+9];
1959: alpha11 = x[11*i+10];
1960: while (n-->0) {
1961: y[11*(*idx)] += alpha1*(*v);
1962: y[11*(*idx)+1] += alpha2*(*v);
1963: y[11*(*idx)+2] += alpha3*(*v);
1964: y[11*(*idx)+3] += alpha4*(*v);
1965: y[11*(*idx)+4] += alpha5*(*v);
1966: y[11*(*idx)+5] += alpha6*(*v);
1967: y[11*(*idx)+6] += alpha7*(*v);
1968: y[11*(*idx)+7] += alpha8*(*v);
1969: y[11*(*idx)+8] += alpha9*(*v);
1970: y[11*(*idx)+9] += alpha10*(*v);
1971: y[11*(*idx)+10] += alpha11*(*v);
1972: idx++; v++;
1973: }
1974: }
1975: PetscLogFlops(22.0*a->nz);
1976: VecRestoreArrayRead(xx,&x);
1977: VecRestoreArray(zz,&y);
1978: return 0;
1979: }
1981: /*--------------------------------------------------------------------------------------------*/
1982: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1983: {
1984: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1985: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1986: const PetscScalar *x,*v;
1987: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1988: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1989: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1990: PetscInt nonzerorow=0,n,i,jrow,j;
1992: VecGetArrayRead(xx,&x);
1993: VecGetArray(yy,&y);
1994: idx = a->j;
1995: v = a->a;
1996: ii = a->i;
1998: for (i=0; i<m; i++) {
1999: jrow = ii[i];
2000: n = ii[i+1] - jrow;
2001: sum1 = 0.0;
2002: sum2 = 0.0;
2003: sum3 = 0.0;
2004: sum4 = 0.0;
2005: sum5 = 0.0;
2006: sum6 = 0.0;
2007: sum7 = 0.0;
2008: sum8 = 0.0;
2009: sum9 = 0.0;
2010: sum10 = 0.0;
2011: sum11 = 0.0;
2012: sum12 = 0.0;
2013: sum13 = 0.0;
2014: sum14 = 0.0;
2015: sum15 = 0.0;
2016: sum16 = 0.0;
2018: nonzerorow += (n>0);
2019: for (j=0; j<n; j++) {
2020: sum1 += v[jrow]*x[16*idx[jrow]];
2021: sum2 += v[jrow]*x[16*idx[jrow]+1];
2022: sum3 += v[jrow]*x[16*idx[jrow]+2];
2023: sum4 += v[jrow]*x[16*idx[jrow]+3];
2024: sum5 += v[jrow]*x[16*idx[jrow]+4];
2025: sum6 += v[jrow]*x[16*idx[jrow]+5];
2026: sum7 += v[jrow]*x[16*idx[jrow]+6];
2027: sum8 += v[jrow]*x[16*idx[jrow]+7];
2028: sum9 += v[jrow]*x[16*idx[jrow]+8];
2029: sum10 += v[jrow]*x[16*idx[jrow]+9];
2030: sum11 += v[jrow]*x[16*idx[jrow]+10];
2031: sum12 += v[jrow]*x[16*idx[jrow]+11];
2032: sum13 += v[jrow]*x[16*idx[jrow]+12];
2033: sum14 += v[jrow]*x[16*idx[jrow]+13];
2034: sum15 += v[jrow]*x[16*idx[jrow]+14];
2035: sum16 += v[jrow]*x[16*idx[jrow]+15];
2036: jrow++;
2037: }
2038: y[16*i] = sum1;
2039: y[16*i+1] = sum2;
2040: y[16*i+2] = sum3;
2041: y[16*i+3] = sum4;
2042: y[16*i+4] = sum5;
2043: y[16*i+5] = sum6;
2044: y[16*i+6] = sum7;
2045: y[16*i+7] = sum8;
2046: y[16*i+8] = sum9;
2047: y[16*i+9] = sum10;
2048: y[16*i+10] = sum11;
2049: y[16*i+11] = sum12;
2050: y[16*i+12] = sum13;
2051: y[16*i+13] = sum14;
2052: y[16*i+14] = sum15;
2053: y[16*i+15] = sum16;
2054: }
2056: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2057: VecRestoreArrayRead(xx,&x);
2058: VecRestoreArray(yy,&y);
2059: return 0;
2060: }
2062: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2063: {
2064: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2065: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2066: const PetscScalar *x,*v;
2067: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2068: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2069: const PetscInt m = b->AIJ->rmap->n,*idx;
2070: PetscInt n,i;
2072: VecSet(yy,0.0);
2073: VecGetArrayRead(xx,&x);
2074: VecGetArray(yy,&y);
2076: for (i=0; i<m; i++) {
2077: idx = a->j + a->i[i];
2078: v = a->a + a->i[i];
2079: n = a->i[i+1] - a->i[i];
2080: alpha1 = x[16*i];
2081: alpha2 = x[16*i+1];
2082: alpha3 = x[16*i+2];
2083: alpha4 = x[16*i+3];
2084: alpha5 = x[16*i+4];
2085: alpha6 = x[16*i+5];
2086: alpha7 = x[16*i+6];
2087: alpha8 = x[16*i+7];
2088: alpha9 = x[16*i+8];
2089: alpha10 = x[16*i+9];
2090: alpha11 = x[16*i+10];
2091: alpha12 = x[16*i+11];
2092: alpha13 = x[16*i+12];
2093: alpha14 = x[16*i+13];
2094: alpha15 = x[16*i+14];
2095: alpha16 = x[16*i+15];
2096: while (n-->0) {
2097: y[16*(*idx)] += alpha1*(*v);
2098: y[16*(*idx)+1] += alpha2*(*v);
2099: y[16*(*idx)+2] += alpha3*(*v);
2100: y[16*(*idx)+3] += alpha4*(*v);
2101: y[16*(*idx)+4] += alpha5*(*v);
2102: y[16*(*idx)+5] += alpha6*(*v);
2103: y[16*(*idx)+6] += alpha7*(*v);
2104: y[16*(*idx)+7] += alpha8*(*v);
2105: y[16*(*idx)+8] += alpha9*(*v);
2106: y[16*(*idx)+9] += alpha10*(*v);
2107: y[16*(*idx)+10] += alpha11*(*v);
2108: y[16*(*idx)+11] += alpha12*(*v);
2109: y[16*(*idx)+12] += alpha13*(*v);
2110: y[16*(*idx)+13] += alpha14*(*v);
2111: y[16*(*idx)+14] += alpha15*(*v);
2112: y[16*(*idx)+15] += alpha16*(*v);
2113: idx++; v++;
2114: }
2115: }
2116: PetscLogFlops(32.0*a->nz);
2117: VecRestoreArrayRead(xx,&x);
2118: VecRestoreArray(yy,&y);
2119: return 0;
2120: }
2122: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2123: {
2124: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2125: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2126: const PetscScalar *x,*v;
2127: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2128: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2129: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2130: PetscInt n,i,jrow,j;
2132: if (yy != zz) VecCopy(yy,zz);
2133: VecGetArrayRead(xx,&x);
2134: VecGetArray(zz,&y);
2135: idx = a->j;
2136: v = a->a;
2137: ii = a->i;
2139: for (i=0; i<m; i++) {
2140: jrow = ii[i];
2141: n = ii[i+1] - jrow;
2142: sum1 = 0.0;
2143: sum2 = 0.0;
2144: sum3 = 0.0;
2145: sum4 = 0.0;
2146: sum5 = 0.0;
2147: sum6 = 0.0;
2148: sum7 = 0.0;
2149: sum8 = 0.0;
2150: sum9 = 0.0;
2151: sum10 = 0.0;
2152: sum11 = 0.0;
2153: sum12 = 0.0;
2154: sum13 = 0.0;
2155: sum14 = 0.0;
2156: sum15 = 0.0;
2157: sum16 = 0.0;
2158: for (j=0; j<n; j++) {
2159: sum1 += v[jrow]*x[16*idx[jrow]];
2160: sum2 += v[jrow]*x[16*idx[jrow]+1];
2161: sum3 += v[jrow]*x[16*idx[jrow]+2];
2162: sum4 += v[jrow]*x[16*idx[jrow]+3];
2163: sum5 += v[jrow]*x[16*idx[jrow]+4];
2164: sum6 += v[jrow]*x[16*idx[jrow]+5];
2165: sum7 += v[jrow]*x[16*idx[jrow]+6];
2166: sum8 += v[jrow]*x[16*idx[jrow]+7];
2167: sum9 += v[jrow]*x[16*idx[jrow]+8];
2168: sum10 += v[jrow]*x[16*idx[jrow]+9];
2169: sum11 += v[jrow]*x[16*idx[jrow]+10];
2170: sum12 += v[jrow]*x[16*idx[jrow]+11];
2171: sum13 += v[jrow]*x[16*idx[jrow]+12];
2172: sum14 += v[jrow]*x[16*idx[jrow]+13];
2173: sum15 += v[jrow]*x[16*idx[jrow]+14];
2174: sum16 += v[jrow]*x[16*idx[jrow]+15];
2175: jrow++;
2176: }
2177: y[16*i] += sum1;
2178: y[16*i+1] += sum2;
2179: y[16*i+2] += sum3;
2180: y[16*i+3] += sum4;
2181: y[16*i+4] += sum5;
2182: y[16*i+5] += sum6;
2183: y[16*i+6] += sum7;
2184: y[16*i+7] += sum8;
2185: y[16*i+8] += sum9;
2186: y[16*i+9] += sum10;
2187: y[16*i+10] += sum11;
2188: y[16*i+11] += sum12;
2189: y[16*i+12] += sum13;
2190: y[16*i+13] += sum14;
2191: y[16*i+14] += sum15;
2192: y[16*i+15] += sum16;
2193: }
2195: PetscLogFlops(32.0*a->nz);
2196: VecRestoreArrayRead(xx,&x);
2197: VecRestoreArray(zz,&y);
2198: return 0;
2199: }
2201: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2202: {
2203: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2204: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2205: const PetscScalar *x,*v;
2206: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2207: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2208: const PetscInt m = b->AIJ->rmap->n,*idx;
2209: PetscInt n,i;
2211: if (yy != zz) VecCopy(yy,zz);
2212: VecGetArrayRead(xx,&x);
2213: VecGetArray(zz,&y);
2214: for (i=0; i<m; i++) {
2215: idx = a->j + a->i[i];
2216: v = a->a + a->i[i];
2217: n = a->i[i+1] - a->i[i];
2218: alpha1 = x[16*i];
2219: alpha2 = x[16*i+1];
2220: alpha3 = x[16*i+2];
2221: alpha4 = x[16*i+3];
2222: alpha5 = x[16*i+4];
2223: alpha6 = x[16*i+5];
2224: alpha7 = x[16*i+6];
2225: alpha8 = x[16*i+7];
2226: alpha9 = x[16*i+8];
2227: alpha10 = x[16*i+9];
2228: alpha11 = x[16*i+10];
2229: alpha12 = x[16*i+11];
2230: alpha13 = x[16*i+12];
2231: alpha14 = x[16*i+13];
2232: alpha15 = x[16*i+14];
2233: alpha16 = x[16*i+15];
2234: while (n-->0) {
2235: y[16*(*idx)] += alpha1*(*v);
2236: y[16*(*idx)+1] += alpha2*(*v);
2237: y[16*(*idx)+2] += alpha3*(*v);
2238: y[16*(*idx)+3] += alpha4*(*v);
2239: y[16*(*idx)+4] += alpha5*(*v);
2240: y[16*(*idx)+5] += alpha6*(*v);
2241: y[16*(*idx)+6] += alpha7*(*v);
2242: y[16*(*idx)+7] += alpha8*(*v);
2243: y[16*(*idx)+8] += alpha9*(*v);
2244: y[16*(*idx)+9] += alpha10*(*v);
2245: y[16*(*idx)+10] += alpha11*(*v);
2246: y[16*(*idx)+11] += alpha12*(*v);
2247: y[16*(*idx)+12] += alpha13*(*v);
2248: y[16*(*idx)+13] += alpha14*(*v);
2249: y[16*(*idx)+14] += alpha15*(*v);
2250: y[16*(*idx)+15] += alpha16*(*v);
2251: idx++; v++;
2252: }
2253: }
2254: PetscLogFlops(32.0*a->nz);
2255: VecRestoreArrayRead(xx,&x);
2256: VecRestoreArray(zz,&y);
2257: return 0;
2258: }
2260: /*--------------------------------------------------------------------------------------------*/
2261: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2262: {
2263: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2264: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2265: const PetscScalar *x,*v;
2266: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2267: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2268: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2269: PetscInt nonzerorow=0,n,i,jrow,j;
2271: VecGetArrayRead(xx,&x);
2272: VecGetArray(yy,&y);
2273: idx = a->j;
2274: v = a->a;
2275: ii = a->i;
2277: for (i=0; i<m; i++) {
2278: jrow = ii[i];
2279: n = ii[i+1] - jrow;
2280: sum1 = 0.0;
2281: sum2 = 0.0;
2282: sum3 = 0.0;
2283: sum4 = 0.0;
2284: sum5 = 0.0;
2285: sum6 = 0.0;
2286: sum7 = 0.0;
2287: sum8 = 0.0;
2288: sum9 = 0.0;
2289: sum10 = 0.0;
2290: sum11 = 0.0;
2291: sum12 = 0.0;
2292: sum13 = 0.0;
2293: sum14 = 0.0;
2294: sum15 = 0.0;
2295: sum16 = 0.0;
2296: sum17 = 0.0;
2297: sum18 = 0.0;
2299: nonzerorow += (n>0);
2300: for (j=0; j<n; j++) {
2301: sum1 += v[jrow]*x[18*idx[jrow]];
2302: sum2 += v[jrow]*x[18*idx[jrow]+1];
2303: sum3 += v[jrow]*x[18*idx[jrow]+2];
2304: sum4 += v[jrow]*x[18*idx[jrow]+3];
2305: sum5 += v[jrow]*x[18*idx[jrow]+4];
2306: sum6 += v[jrow]*x[18*idx[jrow]+5];
2307: sum7 += v[jrow]*x[18*idx[jrow]+6];
2308: sum8 += v[jrow]*x[18*idx[jrow]+7];
2309: sum9 += v[jrow]*x[18*idx[jrow]+8];
2310: sum10 += v[jrow]*x[18*idx[jrow]+9];
2311: sum11 += v[jrow]*x[18*idx[jrow]+10];
2312: sum12 += v[jrow]*x[18*idx[jrow]+11];
2313: sum13 += v[jrow]*x[18*idx[jrow]+12];
2314: sum14 += v[jrow]*x[18*idx[jrow]+13];
2315: sum15 += v[jrow]*x[18*idx[jrow]+14];
2316: sum16 += v[jrow]*x[18*idx[jrow]+15];
2317: sum17 += v[jrow]*x[18*idx[jrow]+16];
2318: sum18 += v[jrow]*x[18*idx[jrow]+17];
2319: jrow++;
2320: }
2321: y[18*i] = sum1;
2322: y[18*i+1] = sum2;
2323: y[18*i+2] = sum3;
2324: y[18*i+3] = sum4;
2325: y[18*i+4] = sum5;
2326: y[18*i+5] = sum6;
2327: y[18*i+6] = sum7;
2328: y[18*i+7] = sum8;
2329: y[18*i+8] = sum9;
2330: y[18*i+9] = sum10;
2331: y[18*i+10] = sum11;
2332: y[18*i+11] = sum12;
2333: y[18*i+12] = sum13;
2334: y[18*i+13] = sum14;
2335: y[18*i+14] = sum15;
2336: y[18*i+15] = sum16;
2337: y[18*i+16] = sum17;
2338: y[18*i+17] = sum18;
2339: }
2341: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2342: VecRestoreArrayRead(xx,&x);
2343: VecRestoreArray(yy,&y);
2344: return 0;
2345: }
2347: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2348: {
2349: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2350: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2351: const PetscScalar *x,*v;
2352: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2353: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2354: const PetscInt m = b->AIJ->rmap->n,*idx;
2355: PetscInt n,i;
2357: VecSet(yy,0.0);
2358: VecGetArrayRead(xx,&x);
2359: VecGetArray(yy,&y);
2361: for (i=0; i<m; i++) {
2362: idx = a->j + a->i[i];
2363: v = a->a + a->i[i];
2364: n = a->i[i+1] - a->i[i];
2365: alpha1 = x[18*i];
2366: alpha2 = x[18*i+1];
2367: alpha3 = x[18*i+2];
2368: alpha4 = x[18*i+3];
2369: alpha5 = x[18*i+4];
2370: alpha6 = x[18*i+5];
2371: alpha7 = x[18*i+6];
2372: alpha8 = x[18*i+7];
2373: alpha9 = x[18*i+8];
2374: alpha10 = x[18*i+9];
2375: alpha11 = x[18*i+10];
2376: alpha12 = x[18*i+11];
2377: alpha13 = x[18*i+12];
2378: alpha14 = x[18*i+13];
2379: alpha15 = x[18*i+14];
2380: alpha16 = x[18*i+15];
2381: alpha17 = x[18*i+16];
2382: alpha18 = x[18*i+17];
2383: while (n-->0) {
2384: y[18*(*idx)] += alpha1*(*v);
2385: y[18*(*idx)+1] += alpha2*(*v);
2386: y[18*(*idx)+2] += alpha3*(*v);
2387: y[18*(*idx)+3] += alpha4*(*v);
2388: y[18*(*idx)+4] += alpha5*(*v);
2389: y[18*(*idx)+5] += alpha6*(*v);
2390: y[18*(*idx)+6] += alpha7*(*v);
2391: y[18*(*idx)+7] += alpha8*(*v);
2392: y[18*(*idx)+8] += alpha9*(*v);
2393: y[18*(*idx)+9] += alpha10*(*v);
2394: y[18*(*idx)+10] += alpha11*(*v);
2395: y[18*(*idx)+11] += alpha12*(*v);
2396: y[18*(*idx)+12] += alpha13*(*v);
2397: y[18*(*idx)+13] += alpha14*(*v);
2398: y[18*(*idx)+14] += alpha15*(*v);
2399: y[18*(*idx)+15] += alpha16*(*v);
2400: y[18*(*idx)+16] += alpha17*(*v);
2401: y[18*(*idx)+17] += alpha18*(*v);
2402: idx++; v++;
2403: }
2404: }
2405: PetscLogFlops(36.0*a->nz);
2406: VecRestoreArrayRead(xx,&x);
2407: VecRestoreArray(yy,&y);
2408: return 0;
2409: }
2411: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2412: {
2413: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2414: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2415: const PetscScalar *x,*v;
2416: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2417: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2418: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2419: PetscInt n,i,jrow,j;
2421: if (yy != zz) VecCopy(yy,zz);
2422: VecGetArrayRead(xx,&x);
2423: VecGetArray(zz,&y);
2424: idx = a->j;
2425: v = a->a;
2426: ii = a->i;
2428: for (i=0; i<m; i++) {
2429: jrow = ii[i];
2430: n = ii[i+1] - jrow;
2431: sum1 = 0.0;
2432: sum2 = 0.0;
2433: sum3 = 0.0;
2434: sum4 = 0.0;
2435: sum5 = 0.0;
2436: sum6 = 0.0;
2437: sum7 = 0.0;
2438: sum8 = 0.0;
2439: sum9 = 0.0;
2440: sum10 = 0.0;
2441: sum11 = 0.0;
2442: sum12 = 0.0;
2443: sum13 = 0.0;
2444: sum14 = 0.0;
2445: sum15 = 0.0;
2446: sum16 = 0.0;
2447: sum17 = 0.0;
2448: sum18 = 0.0;
2449: for (j=0; j<n; j++) {
2450: sum1 += v[jrow]*x[18*idx[jrow]];
2451: sum2 += v[jrow]*x[18*idx[jrow]+1];
2452: sum3 += v[jrow]*x[18*idx[jrow]+2];
2453: sum4 += v[jrow]*x[18*idx[jrow]+3];
2454: sum5 += v[jrow]*x[18*idx[jrow]+4];
2455: sum6 += v[jrow]*x[18*idx[jrow]+5];
2456: sum7 += v[jrow]*x[18*idx[jrow]+6];
2457: sum8 += v[jrow]*x[18*idx[jrow]+7];
2458: sum9 += v[jrow]*x[18*idx[jrow]+8];
2459: sum10 += v[jrow]*x[18*idx[jrow]+9];
2460: sum11 += v[jrow]*x[18*idx[jrow]+10];
2461: sum12 += v[jrow]*x[18*idx[jrow]+11];
2462: sum13 += v[jrow]*x[18*idx[jrow]+12];
2463: sum14 += v[jrow]*x[18*idx[jrow]+13];
2464: sum15 += v[jrow]*x[18*idx[jrow]+14];
2465: sum16 += v[jrow]*x[18*idx[jrow]+15];
2466: sum17 += v[jrow]*x[18*idx[jrow]+16];
2467: sum18 += v[jrow]*x[18*idx[jrow]+17];
2468: jrow++;
2469: }
2470: y[18*i] += sum1;
2471: y[18*i+1] += sum2;
2472: y[18*i+2] += sum3;
2473: y[18*i+3] += sum4;
2474: y[18*i+4] += sum5;
2475: y[18*i+5] += sum6;
2476: y[18*i+6] += sum7;
2477: y[18*i+7] += sum8;
2478: y[18*i+8] += sum9;
2479: y[18*i+9] += sum10;
2480: y[18*i+10] += sum11;
2481: y[18*i+11] += sum12;
2482: y[18*i+12] += sum13;
2483: y[18*i+13] += sum14;
2484: y[18*i+14] += sum15;
2485: y[18*i+15] += sum16;
2486: y[18*i+16] += sum17;
2487: y[18*i+17] += sum18;
2488: }
2490: PetscLogFlops(36.0*a->nz);
2491: VecRestoreArrayRead(xx,&x);
2492: VecRestoreArray(zz,&y);
2493: return 0;
2494: }
2496: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2497: {
2498: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2499: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2500: const PetscScalar *x,*v;
2501: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2502: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2503: const PetscInt m = b->AIJ->rmap->n,*idx;
2504: PetscInt n,i;
2506: if (yy != zz) VecCopy(yy,zz);
2507: VecGetArrayRead(xx,&x);
2508: VecGetArray(zz,&y);
2509: for (i=0; i<m; i++) {
2510: idx = a->j + a->i[i];
2511: v = a->a + a->i[i];
2512: n = a->i[i+1] - a->i[i];
2513: alpha1 = x[18*i];
2514: alpha2 = x[18*i+1];
2515: alpha3 = x[18*i+2];
2516: alpha4 = x[18*i+3];
2517: alpha5 = x[18*i+4];
2518: alpha6 = x[18*i+5];
2519: alpha7 = x[18*i+6];
2520: alpha8 = x[18*i+7];
2521: alpha9 = x[18*i+8];
2522: alpha10 = x[18*i+9];
2523: alpha11 = x[18*i+10];
2524: alpha12 = x[18*i+11];
2525: alpha13 = x[18*i+12];
2526: alpha14 = x[18*i+13];
2527: alpha15 = x[18*i+14];
2528: alpha16 = x[18*i+15];
2529: alpha17 = x[18*i+16];
2530: alpha18 = x[18*i+17];
2531: while (n-->0) {
2532: y[18*(*idx)] += alpha1*(*v);
2533: y[18*(*idx)+1] += alpha2*(*v);
2534: y[18*(*idx)+2] += alpha3*(*v);
2535: y[18*(*idx)+3] += alpha4*(*v);
2536: y[18*(*idx)+4] += alpha5*(*v);
2537: y[18*(*idx)+5] += alpha6*(*v);
2538: y[18*(*idx)+6] += alpha7*(*v);
2539: y[18*(*idx)+7] += alpha8*(*v);
2540: y[18*(*idx)+8] += alpha9*(*v);
2541: y[18*(*idx)+9] += alpha10*(*v);
2542: y[18*(*idx)+10] += alpha11*(*v);
2543: y[18*(*idx)+11] += alpha12*(*v);
2544: y[18*(*idx)+12] += alpha13*(*v);
2545: y[18*(*idx)+13] += alpha14*(*v);
2546: y[18*(*idx)+14] += alpha15*(*v);
2547: y[18*(*idx)+15] += alpha16*(*v);
2548: y[18*(*idx)+16] += alpha17*(*v);
2549: y[18*(*idx)+17] += alpha18*(*v);
2550: idx++; v++;
2551: }
2552: }
2553: PetscLogFlops(36.0*a->nz);
2554: VecRestoreArrayRead(xx,&x);
2555: VecRestoreArray(zz,&y);
2556: return 0;
2557: }
2559: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2560: {
2561: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2562: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2563: const PetscScalar *x,*v;
2564: PetscScalar *y,*sums;
2565: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2566: PetscInt n,i,jrow,j,dof = b->dof,k;
2568: VecGetArrayRead(xx,&x);
2569: VecSet(yy,0.0);
2570: VecGetArray(yy,&y);
2571: idx = a->j;
2572: v = a->a;
2573: ii = a->i;
2575: for (i=0; i<m; i++) {
2576: jrow = ii[i];
2577: n = ii[i+1] - jrow;
2578: sums = y + dof*i;
2579: for (j=0; j<n; j++) {
2580: for (k=0; k<dof; k++) {
2581: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2582: }
2583: jrow++;
2584: }
2585: }
2587: PetscLogFlops(2.0*dof*a->nz);
2588: VecRestoreArrayRead(xx,&x);
2589: VecRestoreArray(yy,&y);
2590: return 0;
2591: }
2593: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2594: {
2595: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2596: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2597: const PetscScalar *x,*v;
2598: PetscScalar *y,*sums;
2599: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2600: PetscInt n,i,jrow,j,dof = b->dof,k;
2602: if (yy != zz) VecCopy(yy,zz);
2603: VecGetArrayRead(xx,&x);
2604: VecGetArray(zz,&y);
2605: idx = a->j;
2606: v = a->a;
2607: ii = a->i;
2609: for (i=0; i<m; i++) {
2610: jrow = ii[i];
2611: n = ii[i+1] - jrow;
2612: sums = y + dof*i;
2613: for (j=0; j<n; j++) {
2614: for (k=0; k<dof; k++) {
2615: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2616: }
2617: jrow++;
2618: }
2619: }
2621: PetscLogFlops(2.0*dof*a->nz);
2622: VecRestoreArrayRead(xx,&x);
2623: VecRestoreArray(zz,&y);
2624: return 0;
2625: }
2627: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2628: {
2629: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2630: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2631: const PetscScalar *x,*v,*alpha;
2632: PetscScalar *y;
2633: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2634: PetscInt n,i,k;
2636: VecGetArrayRead(xx,&x);
2637: VecSet(yy,0.0);
2638: VecGetArray(yy,&y);
2639: for (i=0; i<m; i++) {
2640: idx = a->j + a->i[i];
2641: v = a->a + a->i[i];
2642: n = a->i[i+1] - a->i[i];
2643: alpha = x + dof*i;
2644: while (n-->0) {
2645: for (k=0; k<dof; k++) {
2646: y[dof*(*idx)+k] += alpha[k]*(*v);
2647: }
2648: idx++; v++;
2649: }
2650: }
2651: PetscLogFlops(2.0*dof*a->nz);
2652: VecRestoreArrayRead(xx,&x);
2653: VecRestoreArray(yy,&y);
2654: return 0;
2655: }
2657: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2658: {
2659: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2660: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2661: const PetscScalar *x,*v,*alpha;
2662: PetscScalar *y;
2663: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2664: PetscInt n,i,k;
2666: if (yy != zz) VecCopy(yy,zz);
2667: VecGetArrayRead(xx,&x);
2668: VecGetArray(zz,&y);
2669: for (i=0; i<m; i++) {
2670: idx = a->j + a->i[i];
2671: v = a->a + a->i[i];
2672: n = a->i[i+1] - a->i[i];
2673: alpha = x + dof*i;
2674: while (n-->0) {
2675: for (k=0; k<dof; k++) {
2676: y[dof*(*idx)+k] += alpha[k]*(*v);
2677: }
2678: idx++; v++;
2679: }
2680: }
2681: PetscLogFlops(2.0*dof*a->nz);
2682: VecRestoreArrayRead(xx,&x);
2683: VecRestoreArray(zz,&y);
2684: return 0;
2685: }
2687: /*===================================================================================*/
2688: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2689: {
2690: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2692: /* start the scatter */
2693: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2694: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2695: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2696: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2697: return 0;
2698: }
2700: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2701: {
2702: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2704: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2705: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2706: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2707: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2708: return 0;
2709: }
2711: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2712: {
2713: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2715: /* start the scatter */
2716: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2717: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2718: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2719: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2720: return 0;
2721: }
2723: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2724: {
2725: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2727: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2728: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2729: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2730: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2731: return 0;
2732: }
2734: /* ----------------------------------------------------------------*/
2735: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2736: {
2737: Mat_Product *product = C->product;
2739: if (product->type == MATPRODUCT_PtAP) {
2740: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2741: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2742: return 0;
2743: }
2745: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2746: {
2748: Mat_Product *product = C->product;
2749: PetscBool flg = PETSC_FALSE;
2750: Mat A=product->A,P=product->B;
2751: PetscInt alg=1; /* set default algorithm */
2752: #if !defined(PETSC_HAVE_HYPRE)
2753: const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2754: PetscInt nalg=4;
2755: #else
2756: const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2757: PetscInt nalg=5;
2758: #endif
2762: /* PtAP */
2763: /* Check matrix local sizes */
2767: /* Set the default algorithm */
2768: PetscStrcmp(C->product->alg,"default",&flg);
2769: if (flg) {
2770: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2771: }
2773: /* Get runtime option */
2774: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2775: PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2776: if (flg) {
2777: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2778: }
2779: PetscOptionsEnd();
2781: PetscStrcmp(C->product->alg,"allatonce",&flg);
2782: if (flg) {
2783: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2784: return 0;
2785: }
2787: PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2788: if (flg) {
2789: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2790: return 0;
2791: }
2793: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2794: PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2795: MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2796: MatProductSetFromOptions(C);
2797: return 0;
2798: }
2800: /* ----------------------------------------------------------------*/
2801: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2802: {
2803: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2804: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2805: Mat P =pp->AIJ;
2806: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2807: PetscInt *pti,*ptj,*ptJ;
2808: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2809: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2810: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2811: MatScalar *ca;
2812: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2814: /* Get ij structure of P^T */
2815: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2817: cn = pn*ppdof;
2818: /* Allocate ci array, arrays for fill computation and */
2819: /* free space for accumulating nonzero column info */
2820: PetscMalloc1(cn+1,&ci);
2821: ci[0] = 0;
2823: /* Work arrays for rows of P^T*A */
2824: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2825: PetscArrayzero(ptadenserow,an);
2826: PetscArrayzero(denserow,cn);
2828: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2829: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2830: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2831: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2832: current_space = free_space;
2834: /* Determine symbolic info for each row of C: */
2835: for (i=0; i<pn; i++) {
2836: ptnzi = pti[i+1] - pti[i];
2837: ptJ = ptj + pti[i];
2838: for (dof=0; dof<ppdof; dof++) {
2839: ptanzi = 0;
2840: /* Determine symbolic row of PtA: */
2841: for (j=0; j<ptnzi; j++) {
2842: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2843: arow = ptJ[j]*ppdof + dof;
2844: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2845: anzj = ai[arow+1] - ai[arow];
2846: ajj = aj + ai[arow];
2847: for (k=0; k<anzj; k++) {
2848: if (!ptadenserow[ajj[k]]) {
2849: ptadenserow[ajj[k]] = -1;
2850: ptasparserow[ptanzi++] = ajj[k];
2851: }
2852: }
2853: }
2854: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2855: ptaj = ptasparserow;
2856: cnzi = 0;
2857: for (j=0; j<ptanzi; j++) {
2858: /* Get offset within block of P */
2859: pshift = *ptaj%ppdof;
2860: /* Get block row of P */
2861: prow = (*ptaj++)/ppdof; /* integer division */
2862: /* P has same number of nonzeros per row as the compressed form */
2863: pnzj = pi[prow+1] - pi[prow];
2864: pjj = pj + pi[prow];
2865: for (k=0;k<pnzj;k++) {
2866: /* Locations in C are shifted by the offset within the block */
2867: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2868: if (!denserow[pjj[k]*ppdof+pshift]) {
2869: denserow[pjj[k]*ppdof+pshift] = -1;
2870: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2871: }
2872: }
2873: }
2875: /* sort sparserow */
2876: PetscSortInt(cnzi,sparserow);
2878: /* If free space is not available, make more free space */
2879: /* Double the amount of total space in the list */
2880: if (current_space->local_remaining<cnzi) {
2881: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
2882: }
2884: /* Copy data into free space, and zero out denserows */
2885: PetscArraycpy(current_space->array,sparserow,cnzi);
2887: current_space->array += cnzi;
2888: current_space->local_used += cnzi;
2889: current_space->local_remaining -= cnzi;
2891: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2892: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2894: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2895: /* For now, we will recompute what is needed. */
2896: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2897: }
2898: }
2899: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2900: /* Allocate space for cj, initialize cj, and */
2901: /* destroy list of free space and other temporary array(s) */
2902: PetscMalloc1(ci[cn]+1,&cj);
2903: PetscFreeSpaceContiguous(&free_space,cj);
2904: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
2906: /* Allocate space for ca */
2907: PetscCalloc1(ci[cn]+1,&ca);
2909: /* put together the new matrix */
2910: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
2911: MatSetBlockSize(C,pp->dof);
2913: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2914: /* Since these are PETSc arrays, change flags to free them as necessary. */
2915: c = (Mat_SeqAIJ*)(C->data);
2916: c->free_a = PETSC_TRUE;
2917: c->free_ij = PETSC_TRUE;
2918: c->nonew = 0;
2920: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2921: C->ops->productnumeric = MatProductNumeric_PtAP;
2923: /* Clean up. */
2924: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2925: return 0;
2926: }
2928: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2929: {
2930: /* This routine requires testing -- first draft only */
2931: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2932: Mat P =pp->AIJ;
2933: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
2934: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
2935: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
2936: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
2937: const PetscInt *ci=c->i,*cj=c->j,*cjj;
2938: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2939: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
2940: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
2941: MatScalar *ca=c->a,*caj,*apa;
2943: /* Allocate temporary array for storage of one row of A*P */
2944: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
2946: /* Clear old values in C */
2947: PetscArrayzero(ca,ci[cm]);
2949: for (i=0; i<am; i++) {
2950: /* Form sparse row of A*P */
2951: anzi = ai[i+1] - ai[i];
2952: apnzj = 0;
2953: for (j=0; j<anzi; j++) {
2954: /* Get offset within block of P */
2955: pshift = *aj%ppdof;
2956: /* Get block row of P */
2957: prow = *aj++/ppdof; /* integer division */
2958: pnzj = pi[prow+1] - pi[prow];
2959: pjj = pj + pi[prow];
2960: paj = pa + pi[prow];
2961: for (k=0; k<pnzj; k++) {
2962: poffset = pjj[k]*ppdof+pshift;
2963: if (!apjdense[poffset]) {
2964: apjdense[poffset] = -1;
2965: apj[apnzj++] = poffset;
2966: }
2967: apa[poffset] += (*aa)*paj[k];
2968: }
2969: PetscLogFlops(2.0*pnzj);
2970: aa++;
2971: }
2973: /* Sort the j index array for quick sparse axpy. */
2974: /* Note: a array does not need sorting as it is in dense storage locations. */
2975: PetscSortInt(apnzj,apj);
2977: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2978: prow = i/ppdof; /* integer division */
2979: pshift = i%ppdof;
2980: poffset = pi[prow];
2981: pnzi = pi[prow+1] - poffset;
2982: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2983: pJ = pj+poffset;
2984: pA = pa+poffset;
2985: for (j=0; j<pnzi; j++) {
2986: crow = (*pJ)*ppdof+pshift;
2987: cjj = cj + ci[crow];
2988: caj = ca + ci[crow];
2989: pJ++;
2990: /* Perform sparse axpy operation. Note cjj includes apj. */
2991: for (k=0,nextap=0; nextap<apnzj; k++) {
2992: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
2993: }
2994: PetscLogFlops(2.0*apnzj);
2995: pA++;
2996: }
2998: /* Zero the current row info for A*P */
2999: for (j=0; j<apnzj; j++) {
3000: apa[apj[j]] = 0.;
3001: apjdense[apj[j]] = 0;
3002: }
3003: }
3005: /* Assemble the final matrix and clean up */
3006: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3007: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3008: PetscFree3(apa,apj,apjdense);
3009: return 0;
3010: }
3012: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3013: {
3014: Mat_Product *product = C->product;
3015: Mat A=product->A,P=product->B;
3017: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3018: return 0;
3019: }
3021: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3022: {
3023: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3024: }
3026: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3027: {
3028: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3029: }
3031: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3033: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3034: {
3035: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3038: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3039: return 0;
3040: }
3042: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3044: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3045: {
3046: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3048: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3049: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3050: return 0;
3051: }
3053: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3055: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3056: {
3057: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3060: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3061: return 0;
3062: }
3064: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3066: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3067: {
3068: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3071: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3072: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3073: return 0;
3074: }
3076: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3077: {
3078: Mat_Product *product = C->product;
3079: Mat A=product->A,P=product->B;
3080: PetscBool flg;
3082: PetscStrcmp(product->alg,"allatonce",&flg);
3083: if (flg) {
3084: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3085: C->ops->productnumeric = MatProductNumeric_PtAP;
3086: return 0;
3087: }
3089: PetscStrcmp(product->alg,"allatonce_merged",&flg);
3090: if (flg) {
3091: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3092: C->ops->productnumeric = MatProductNumeric_PtAP;
3093: return 0;
3094: }
3096: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3097: }
3099: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3100: {
3101: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3102: Mat a = b->AIJ,B;
3103: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3104: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3105: PetscInt *cols;
3106: PetscScalar *vals;
3108: MatGetSize(a,&m,&n);
3109: PetscMalloc1(dof*m,&ilen);
3110: for (i=0; i<m; i++) {
3111: nmax = PetscMax(nmax,aij->ilen[i]);
3112: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3113: }
3114: MatCreate(PETSC_COMM_SELF,&B);
3115: MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3116: MatSetType(B,newtype);
3117: MatSeqAIJSetPreallocation(B,0,ilen);
3118: PetscFree(ilen);
3119: PetscMalloc1(nmax,&icols);
3120: ii = 0;
3121: for (i=0; i<m; i++) {
3122: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3123: for (j=0; j<dof; j++) {
3124: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3125: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3126: ii++;
3127: }
3128: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3129: }
3130: PetscFree(icols);
3131: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3132: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3134: if (reuse == MAT_INPLACE_MATRIX) {
3135: MatHeaderReplace(A,&B);
3136: } else {
3137: *newmat = B;
3138: }
3139: return 0;
3140: }
3142: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3144: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3145: {
3146: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3147: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3148: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3149: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3150: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3151: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3152: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3153: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3154: PetscInt rstart,cstart,*garray,ii,k;
3155: PetscScalar *vals,*ovals;
3157: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3158: for (i=0; i<A->rmap->n/dof; i++) {
3159: nmax = PetscMax(nmax,AIJ->ilen[i]);
3160: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3161: for (j=0; j<dof; j++) {
3162: dnz[dof*i+j] = AIJ->ilen[i];
3163: onz[dof*i+j] = OAIJ->ilen[i];
3164: }
3165: }
3166: MatCreate(PetscObjectComm((PetscObject)A),&B);
3167: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3168: MatSetType(B,newtype);
3169: MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3170: MatSetBlockSize(B,dof);
3171: PetscFree2(dnz,onz);
3173: PetscMalloc2(nmax,&icols,onmax,&oicols);
3174: rstart = dof*maij->A->rmap->rstart;
3175: cstart = dof*maij->A->cmap->rstart;
3176: garray = mpiaij->garray;
3178: ii = rstart;
3179: for (i=0; i<A->rmap->n/dof; i++) {
3180: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3181: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3182: for (j=0; j<dof; j++) {
3183: for (k=0; k<ncols; k++) {
3184: icols[k] = cstart + dof*cols[k]+j;
3185: }
3186: for (k=0; k<oncols; k++) {
3187: oicols[k] = dof*garray[ocols[k]]+j;
3188: }
3189: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3190: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3191: ii++;
3192: }
3193: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3194: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3195: }
3196: PetscFree2(icols,oicols);
3198: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3199: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3201: if (reuse == MAT_INPLACE_MATRIX) {
3202: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3203: ((PetscObject)A)->refct = 1;
3205: MatHeaderReplace(A,&B);
3207: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3208: } else {
3209: *newmat = B;
3210: }
3211: return 0;
3212: }
3214: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3215: {
3216: Mat A;
3218: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3219: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3220: MatDestroy(&A);
3221: return 0;
3222: }
3224: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3225: {
3226: Mat A;
3228: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3229: MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3230: MatDestroy(&A);
3231: return 0;
3232: }
3234: /* ---------------------------------------------------------------------------------- */
3235: /*@
3236: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3237: operations for multicomponent problems. It interpolates each component the same
3238: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3239: and MATMPIAIJ for distributed matrices.
3241: Collective
3243: Input Parameters:
3244: + A - the AIJ matrix describing the action on blocks
3245: - dof - the block size (number of components per node)
3247: Output Parameter:
3248: . maij - the new MAIJ matrix
3250: Operations provided:
3251: .vb
3252: MatMult
3253: MatMultTranspose
3254: MatMultAdd
3255: MatMultTransposeAdd
3256: MatView
3257: .ve
3259: Level: advanced
3261: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3262: @*/
3263: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3264: {
3265: PetscMPIInt size;
3266: PetscInt n;
3267: Mat B;
3268: #if defined(PETSC_HAVE_CUDA)
3269: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3270: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3271: #endif
3273: dof = PetscAbs(dof);
3274: PetscObjectReference((PetscObject)A);
3276: if (dof == 1) *maij = A;
3277: else {
3278: MatCreate(PetscObjectComm((PetscObject)A),&B);
3279: /* propagate vec type */
3280: MatSetVecType(B,A->defaultvectype);
3281: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3282: PetscLayoutSetBlockSize(B->rmap,dof);
3283: PetscLayoutSetBlockSize(B->cmap,dof);
3284: PetscLayoutSetUp(B->rmap);
3285: PetscLayoutSetUp(B->cmap);
3287: B->assembled = PETSC_TRUE;
3289: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3290: if (size == 1) {
3291: Mat_SeqMAIJ *b;
3293: MatSetType(B,MATSEQMAIJ);
3295: B->ops->setup = NULL;
3296: B->ops->destroy = MatDestroy_SeqMAIJ;
3297: B->ops->view = MatView_SeqMAIJ;
3299: b = (Mat_SeqMAIJ*)B->data;
3300: b->dof = dof;
3301: b->AIJ = A;
3303: if (dof == 2) {
3304: B->ops->mult = MatMult_SeqMAIJ_2;
3305: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3306: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3307: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3308: } else if (dof == 3) {
3309: B->ops->mult = MatMult_SeqMAIJ_3;
3310: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3311: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3312: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3313: } else if (dof == 4) {
3314: B->ops->mult = MatMult_SeqMAIJ_4;
3315: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3316: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3317: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3318: } else if (dof == 5) {
3319: B->ops->mult = MatMult_SeqMAIJ_5;
3320: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3321: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3322: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3323: } else if (dof == 6) {
3324: B->ops->mult = MatMult_SeqMAIJ_6;
3325: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3326: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3327: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3328: } else if (dof == 7) {
3329: B->ops->mult = MatMult_SeqMAIJ_7;
3330: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3331: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3332: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3333: } else if (dof == 8) {
3334: B->ops->mult = MatMult_SeqMAIJ_8;
3335: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3336: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3337: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3338: } else if (dof == 9) {
3339: B->ops->mult = MatMult_SeqMAIJ_9;
3340: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3341: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3342: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3343: } else if (dof == 10) {
3344: B->ops->mult = MatMult_SeqMAIJ_10;
3345: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3346: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3347: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3348: } else if (dof == 11) {
3349: B->ops->mult = MatMult_SeqMAIJ_11;
3350: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3351: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3352: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3353: } else if (dof == 16) {
3354: B->ops->mult = MatMult_SeqMAIJ_16;
3355: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3356: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3357: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3358: } else if (dof == 18) {
3359: B->ops->mult = MatMult_SeqMAIJ_18;
3360: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3361: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3362: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3363: } else {
3364: B->ops->mult = MatMult_SeqMAIJ_N;
3365: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3366: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3367: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3368: }
3369: #if defined(PETSC_HAVE_CUDA)
3370: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3371: #endif
3372: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3373: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3374: } else {
3375: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3376: Mat_MPIMAIJ *b;
3377: IS from,to;
3378: Vec gvec;
3380: MatSetType(B,MATMPIMAIJ);
3382: B->ops->setup = NULL;
3383: B->ops->destroy = MatDestroy_MPIMAIJ;
3384: B->ops->view = MatView_MPIMAIJ;
3386: b = (Mat_MPIMAIJ*)B->data;
3387: b->dof = dof;
3388: b->A = A;
3390: MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3391: MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);
3393: VecGetSize(mpiaij->lvec,&n);
3394: VecCreate(PETSC_COMM_SELF,&b->w);
3395: VecSetSizes(b->w,n*dof,n*dof);
3396: VecSetBlockSize(b->w,dof);
3397: VecSetType(b->w,VECSEQ);
3399: /* create two temporary Index sets for build scatter gather */
3400: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3401: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3403: /* create temporary global vector to generate scatter context */
3404: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3406: /* generate the scatter context */
3407: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3409: ISDestroy(&from);
3410: ISDestroy(&to);
3411: VecDestroy(&gvec);
3413: B->ops->mult = MatMult_MPIMAIJ_dof;
3414: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3415: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3416: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3418: #if defined(PETSC_HAVE_CUDA)
3419: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3420: #endif
3421: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3422: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3423: }
3424: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3425: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3426: MatSetUp(B);
3427: #if defined(PETSC_HAVE_CUDA)
3428: /* temporary until we have CUDA implementation of MAIJ */
3429: {
3430: PetscBool flg;
3431: if (convert) {
3432: PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3433: if (flg) {
3434: MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3435: }
3436: }
3437: }
3438: #endif
3439: *maij = B;
3440: }
3441: return 0;
3442: }