Actual source code: maij.c
petsc-3.8.4 2018-03-24
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
37: .seealso: MatCreateMAIJ()
38: @*/
39: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
40: {
42: PetscBool ismpimaij,isseqmaij;
45: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
46: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
47: if (ismpimaij) {
48: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50: *B = b->A;
51: } else if (isseqmaij) {
52: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54: *B = b->AIJ;
55: } else {
56: *B = A;
57: }
58: return(0);
59: }
61: /*@
62: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64: Logically Collective
66: Input Parameter:
67: + A - the MAIJ matrix
68: - dof - the block size for the new matrix
70: Output Parameter:
71: . B - the new MAIJ matrix
73: Level: advanced
75: .seealso: MatCreateMAIJ()
76: @*/
77: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78: {
80: Mat Aij = NULL;
84: MatMAIJGetAIJ(A,&Aij);
85: MatCreateMAIJ(Aij,dof,B);
86: return(0);
87: }
89: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
90: {
92: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
95: MatDestroy(&b->AIJ);
96: PetscFree(A->data);
97: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
98: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
99: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);
100: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);
101: return(0);
102: }
104: PetscErrorCode MatSetUp_MAIJ(Mat A)
105: {
107: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
108: return(0);
109: }
111: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112: {
114: Mat B;
117: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
118: MatView(B,viewer);
119: MatDestroy(&B);
120: return(0);
121: }
123: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
124: {
126: Mat B;
129: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
130: MatView(B,viewer);
131: MatDestroy(&B);
132: return(0);
133: }
135: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
136: {
138: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
141: MatDestroy(&b->AIJ);
142: MatDestroy(&b->OAIJ);
143: MatDestroy(&b->A);
144: VecScatterDestroy(&b->ctx);
145: VecDestroy(&b->w);
146: PetscFree(A->data);
147: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
148: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
149: PetscObjectChangeTypeName((PetscObject)A,0);
150: return(0);
151: }
153: /*MC
154: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
155: multicomponent problems, interpolating or restricting each component the same way independently.
156: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
158: Operations provided:
159: . MatMult
160: . MatMultTranspose
161: . MatMultAdd
162: . MatMultTransposeAdd
164: Level: advanced
166: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167: M*/
169: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
170: {
172: Mat_MPIMAIJ *b;
173: PetscMPIInt size;
176: PetscNewLog(A,&b);
177: A->data = (void*)b;
179: PetscMemzero(A->ops,sizeof(struct _MatOps));
181: A->ops->setup = MatSetUp_MAIJ;
183: b->AIJ = 0;
184: b->dof = 0;
185: b->OAIJ = 0;
186: b->ctx = 0;
187: b->w = 0;
188: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
189: if (size == 1) {
190: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
191: } else {
192: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
193: }
194: A->preallocated = PETSC_TRUE;
195: A->assembled = PETSC_TRUE;
196: return(0);
197: }
199: /* --------------------------------------------------------------------------------------*/
200: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
201: {
202: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
203: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
204: const PetscScalar *x,*v;
205: PetscScalar *y, sum1, sum2;
206: PetscErrorCode ierr;
207: PetscInt nonzerorow=0,n,i,jrow,j;
208: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
211: VecGetArrayRead(xx,&x);
212: VecGetArray(yy,&y);
213: idx = a->j;
214: v = a->a;
215: ii = a->i;
217: for (i=0; i<m; i++) {
218: jrow = ii[i];
219: n = ii[i+1] - jrow;
220: sum1 = 0.0;
221: sum2 = 0.0;
223: nonzerorow += (n>0);
224: for (j=0; j<n; j++) {
225: sum1 += v[jrow]*x[2*idx[jrow]];
226: sum2 += v[jrow]*x[2*idx[jrow]+1];
227: jrow++;
228: }
229: y[2*i] = sum1;
230: y[2*i+1] = sum2;
231: }
233: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
234: VecRestoreArrayRead(xx,&x);
235: VecRestoreArray(yy,&y);
236: return(0);
237: }
239: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
240: {
241: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
242: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
243: const PetscScalar *x,*v;
244: PetscScalar *y,alpha1,alpha2;
245: PetscErrorCode ierr;
246: PetscInt n,i;
247: const PetscInt m = b->AIJ->rmap->n,*idx;
250: VecSet(yy,0.0);
251: VecGetArrayRead(xx,&x);
252: VecGetArray(yy,&y);
254: for (i=0; i<m; i++) {
255: idx = a->j + a->i[i];
256: v = a->a + a->i[i];
257: n = a->i[i+1] - a->i[i];
258: alpha1 = x[2*i];
259: alpha2 = x[2*i+1];
260: while (n-->0) {
261: y[2*(*idx)] += alpha1*(*v);
262: y[2*(*idx)+1] += alpha2*(*v);
263: idx++; v++;
264: }
265: }
266: PetscLogFlops(4.0*a->nz);
267: VecRestoreArrayRead(xx,&x);
268: VecRestoreArray(yy,&y);
269: return(0);
270: }
272: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273: {
274: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
275: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
276: const PetscScalar *x,*v;
277: PetscScalar *y,sum1, sum2;
278: PetscErrorCode ierr;
279: PetscInt n,i,jrow,j;
280: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
283: if (yy != zz) {VecCopy(yy,zz);}
284: VecGetArrayRead(xx,&x);
285: VecGetArray(zz,&y);
286: idx = a->j;
287: v = a->a;
288: ii = a->i;
290: for (i=0; i<m; i++) {
291: jrow = ii[i];
292: n = ii[i+1] - jrow;
293: sum1 = 0.0;
294: sum2 = 0.0;
295: for (j=0; j<n; j++) {
296: sum1 += v[jrow]*x[2*idx[jrow]];
297: sum2 += v[jrow]*x[2*idx[jrow]+1];
298: jrow++;
299: }
300: y[2*i] += sum1;
301: y[2*i+1] += sum2;
302: }
304: PetscLogFlops(4.0*a->nz);
305: VecRestoreArrayRead(xx,&x);
306: VecRestoreArray(zz,&y);
307: return(0);
308: }
309: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
310: {
311: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
312: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
313: const PetscScalar *x,*v;
314: PetscScalar *y,alpha1,alpha2;
315: PetscErrorCode ierr;
316: PetscInt n,i;
317: const PetscInt m = b->AIJ->rmap->n,*idx;
320: if (yy != zz) {VecCopy(yy,zz);}
321: VecGetArrayRead(xx,&x);
322: VecGetArray(zz,&y);
324: for (i=0; i<m; i++) {
325: idx = a->j + a->i[i];
326: v = a->a + a->i[i];
327: n = a->i[i+1] - a->i[i];
328: alpha1 = x[2*i];
329: alpha2 = x[2*i+1];
330: while (n-->0) {
331: y[2*(*idx)] += alpha1*(*v);
332: y[2*(*idx)+1] += alpha2*(*v);
333: idx++; v++;
334: }
335: }
336: PetscLogFlops(4.0*a->nz);
337: VecRestoreArrayRead(xx,&x);
338: VecRestoreArray(zz,&y);
339: return(0);
340: }
341: /* --------------------------------------------------------------------------------------*/
342: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343: {
344: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
345: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
346: const PetscScalar *x,*v;
347: PetscScalar *y,sum1, sum2, sum3;
348: PetscErrorCode ierr;
349: PetscInt nonzerorow=0,n,i,jrow,j;
350: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
353: VecGetArrayRead(xx,&x);
354: VecGetArray(yy,&y);
355: idx = a->j;
356: v = a->a;
357: ii = a->i;
359: for (i=0; i<m; i++) {
360: jrow = ii[i];
361: n = ii[i+1] - jrow;
362: sum1 = 0.0;
363: sum2 = 0.0;
364: sum3 = 0.0;
366: nonzerorow += (n>0);
367: for (j=0; j<n; j++) {
368: sum1 += v[jrow]*x[3*idx[jrow]];
369: sum2 += v[jrow]*x[3*idx[jrow]+1];
370: sum3 += v[jrow]*x[3*idx[jrow]+2];
371: jrow++;
372: }
373: y[3*i] = sum1;
374: y[3*i+1] = sum2;
375: y[3*i+2] = sum3;
376: }
378: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
379: VecRestoreArrayRead(xx,&x);
380: VecRestoreArray(yy,&y);
381: return(0);
382: }
384: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
385: {
386: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
387: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
388: const PetscScalar *x,*v;
389: PetscScalar *y,alpha1,alpha2,alpha3;
390: PetscErrorCode ierr;
391: PetscInt n,i;
392: const PetscInt m = b->AIJ->rmap->n,*idx;
395: VecSet(yy,0.0);
396: VecGetArrayRead(xx,&x);
397: VecGetArray(yy,&y);
399: for (i=0; i<m; i++) {
400: idx = a->j + a->i[i];
401: v = a->a + a->i[i];
402: n = a->i[i+1] - a->i[i];
403: alpha1 = x[3*i];
404: alpha2 = x[3*i+1];
405: alpha3 = x[3*i+2];
406: while (n-->0) {
407: y[3*(*idx)] += alpha1*(*v);
408: y[3*(*idx)+1] += alpha2*(*v);
409: y[3*(*idx)+2] += alpha3*(*v);
410: idx++; v++;
411: }
412: }
413: PetscLogFlops(6.0*a->nz);
414: VecRestoreArrayRead(xx,&x);
415: VecRestoreArray(yy,&y);
416: return(0);
417: }
419: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
420: {
421: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
422: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
423: const PetscScalar *x,*v;
424: PetscScalar *y,sum1, sum2, sum3;
425: PetscErrorCode ierr;
426: PetscInt n,i,jrow,j;
427: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
430: if (yy != zz) {VecCopy(yy,zz);}
431: VecGetArrayRead(xx,&x);
432: VecGetArray(zz,&y);
433: idx = a->j;
434: v = a->a;
435: ii = a->i;
437: for (i=0; i<m; i++) {
438: jrow = ii[i];
439: n = ii[i+1] - jrow;
440: sum1 = 0.0;
441: sum2 = 0.0;
442: sum3 = 0.0;
443: for (j=0; j<n; j++) {
444: sum1 += v[jrow]*x[3*idx[jrow]];
445: sum2 += v[jrow]*x[3*idx[jrow]+1];
446: sum3 += v[jrow]*x[3*idx[jrow]+2];
447: jrow++;
448: }
449: y[3*i] += sum1;
450: y[3*i+1] += sum2;
451: y[3*i+2] += sum3;
452: }
454: PetscLogFlops(6.0*a->nz);
455: VecRestoreArrayRead(xx,&x);
456: VecRestoreArray(zz,&y);
457: return(0);
458: }
459: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
460: {
461: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
462: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
463: const PetscScalar *x,*v;
464: PetscScalar *y,alpha1,alpha2,alpha3;
465: PetscErrorCode ierr;
466: PetscInt n,i;
467: const PetscInt m = b->AIJ->rmap->n,*idx;
470: if (yy != zz) {VecCopy(yy,zz);}
471: VecGetArrayRead(xx,&x);
472: VecGetArray(zz,&y);
473: for (i=0; i<m; i++) {
474: idx = a->j + a->i[i];
475: v = a->a + a->i[i];
476: n = a->i[i+1] - a->i[i];
477: alpha1 = x[3*i];
478: alpha2 = x[3*i+1];
479: alpha3 = x[3*i+2];
480: while (n-->0) {
481: y[3*(*idx)] += alpha1*(*v);
482: y[3*(*idx)+1] += alpha2*(*v);
483: y[3*(*idx)+2] += alpha3*(*v);
484: idx++; v++;
485: }
486: }
487: PetscLogFlops(6.0*a->nz);
488: VecRestoreArrayRead(xx,&x);
489: VecRestoreArray(zz,&y);
490: return(0);
491: }
493: /* ------------------------------------------------------------------------------*/
494: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
495: {
496: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
497: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
498: const PetscScalar *x,*v;
499: PetscScalar *y,sum1, sum2, sum3, sum4;
500: PetscErrorCode ierr;
501: PetscInt nonzerorow=0,n,i,jrow,j;
502: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
505: VecGetArrayRead(xx,&x);
506: VecGetArray(yy,&y);
507: idx = a->j;
508: v = a->a;
509: ii = a->i;
511: for (i=0; i<m; i++) {
512: jrow = ii[i];
513: n = ii[i+1] - jrow;
514: sum1 = 0.0;
515: sum2 = 0.0;
516: sum3 = 0.0;
517: sum4 = 0.0;
518: nonzerorow += (n>0);
519: for (j=0; j<n; j++) {
520: sum1 += v[jrow]*x[4*idx[jrow]];
521: sum2 += v[jrow]*x[4*idx[jrow]+1];
522: sum3 += v[jrow]*x[4*idx[jrow]+2];
523: sum4 += v[jrow]*x[4*idx[jrow]+3];
524: jrow++;
525: }
526: y[4*i] = sum1;
527: y[4*i+1] = sum2;
528: y[4*i+2] = sum3;
529: y[4*i+3] = sum4;
530: }
532: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
533: VecRestoreArrayRead(xx,&x);
534: VecRestoreArray(yy,&y);
535: return(0);
536: }
538: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
539: {
540: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
541: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
542: const PetscScalar *x,*v;
543: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
544: PetscErrorCode ierr;
545: PetscInt n,i;
546: const PetscInt m = b->AIJ->rmap->n,*idx;
549: VecSet(yy,0.0);
550: VecGetArrayRead(xx,&x);
551: VecGetArray(yy,&y);
552: for (i=0; i<m; i++) {
553: idx = a->j + a->i[i];
554: v = a->a + a->i[i];
555: n = a->i[i+1] - a->i[i];
556: alpha1 = x[4*i];
557: alpha2 = x[4*i+1];
558: alpha3 = x[4*i+2];
559: alpha4 = x[4*i+3];
560: while (n-->0) {
561: y[4*(*idx)] += alpha1*(*v);
562: y[4*(*idx)+1] += alpha2*(*v);
563: y[4*(*idx)+2] += alpha3*(*v);
564: y[4*(*idx)+3] += alpha4*(*v);
565: idx++; v++;
566: }
567: }
568: PetscLogFlops(8.0*a->nz);
569: VecRestoreArrayRead(xx,&x);
570: VecRestoreArray(yy,&y);
571: return(0);
572: }
574: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
575: {
576: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
577: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
578: const PetscScalar *x,*v;
579: PetscScalar *y,sum1, sum2, sum3, sum4;
580: PetscErrorCode ierr;
581: PetscInt n,i,jrow,j;
582: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
585: if (yy != zz) {VecCopy(yy,zz);}
586: VecGetArrayRead(xx,&x);
587: VecGetArray(zz,&y);
588: idx = a->j;
589: v = a->a;
590: ii = a->i;
592: for (i=0; i<m; i++) {
593: jrow = ii[i];
594: n = ii[i+1] - jrow;
595: sum1 = 0.0;
596: sum2 = 0.0;
597: sum3 = 0.0;
598: sum4 = 0.0;
599: for (j=0; j<n; j++) {
600: sum1 += v[jrow]*x[4*idx[jrow]];
601: sum2 += v[jrow]*x[4*idx[jrow]+1];
602: sum3 += v[jrow]*x[4*idx[jrow]+2];
603: sum4 += v[jrow]*x[4*idx[jrow]+3];
604: jrow++;
605: }
606: y[4*i] += sum1;
607: y[4*i+1] += sum2;
608: y[4*i+2] += sum3;
609: y[4*i+3] += sum4;
610: }
612: PetscLogFlops(8.0*a->nz);
613: VecRestoreArrayRead(xx,&x);
614: VecRestoreArray(zz,&y);
615: return(0);
616: }
617: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
618: {
619: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
620: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
621: const PetscScalar *x,*v;
622: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
623: PetscErrorCode ierr;
624: PetscInt n,i;
625: const PetscInt m = b->AIJ->rmap->n,*idx;
628: if (yy != zz) {VecCopy(yy,zz);}
629: VecGetArrayRead(xx,&x);
630: VecGetArray(zz,&y);
632: for (i=0; i<m; i++) {
633: idx = a->j + a->i[i];
634: v = a->a + a->i[i];
635: n = a->i[i+1] - a->i[i];
636: alpha1 = x[4*i];
637: alpha2 = x[4*i+1];
638: alpha3 = x[4*i+2];
639: alpha4 = x[4*i+3];
640: while (n-->0) {
641: y[4*(*idx)] += alpha1*(*v);
642: y[4*(*idx)+1] += alpha2*(*v);
643: y[4*(*idx)+2] += alpha3*(*v);
644: y[4*(*idx)+3] += alpha4*(*v);
645: idx++; v++;
646: }
647: }
648: PetscLogFlops(8.0*a->nz);
649: VecRestoreArrayRead(xx,&x);
650: VecRestoreArray(zz,&y);
651: return(0);
652: }
653: /* ------------------------------------------------------------------------------*/
655: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
656: {
657: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
659: const PetscScalar *x,*v;
660: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
661: PetscErrorCode ierr;
662: PetscInt nonzerorow=0,n,i,jrow,j;
663: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
666: VecGetArrayRead(xx,&x);
667: VecGetArray(yy,&y);
668: idx = a->j;
669: v = a->a;
670: ii = a->i;
672: for (i=0; i<m; i++) {
673: jrow = ii[i];
674: n = ii[i+1] - jrow;
675: sum1 = 0.0;
676: sum2 = 0.0;
677: sum3 = 0.0;
678: sum4 = 0.0;
679: sum5 = 0.0;
681: nonzerorow += (n>0);
682: for (j=0; j<n; j++) {
683: sum1 += v[jrow]*x[5*idx[jrow]];
684: sum2 += v[jrow]*x[5*idx[jrow]+1];
685: sum3 += v[jrow]*x[5*idx[jrow]+2];
686: sum4 += v[jrow]*x[5*idx[jrow]+3];
687: sum5 += v[jrow]*x[5*idx[jrow]+4];
688: jrow++;
689: }
690: y[5*i] = sum1;
691: y[5*i+1] = sum2;
692: y[5*i+2] = sum3;
693: y[5*i+3] = sum4;
694: y[5*i+4] = sum5;
695: }
697: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
698: VecRestoreArrayRead(xx,&x);
699: VecRestoreArray(yy,&y);
700: return(0);
701: }
703: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
704: {
705: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
706: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
707: const PetscScalar *x,*v;
708: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
709: PetscErrorCode ierr;
710: PetscInt n,i;
711: const PetscInt m = b->AIJ->rmap->n,*idx;
714: VecSet(yy,0.0);
715: VecGetArrayRead(xx,&x);
716: VecGetArray(yy,&y);
718: for (i=0; i<m; i++) {
719: idx = a->j + a->i[i];
720: v = a->a + a->i[i];
721: n = a->i[i+1] - a->i[i];
722: alpha1 = x[5*i];
723: alpha2 = x[5*i+1];
724: alpha3 = x[5*i+2];
725: alpha4 = x[5*i+3];
726: alpha5 = x[5*i+4];
727: while (n-->0) {
728: y[5*(*idx)] += alpha1*(*v);
729: y[5*(*idx)+1] += alpha2*(*v);
730: y[5*(*idx)+2] += alpha3*(*v);
731: y[5*(*idx)+3] += alpha4*(*v);
732: y[5*(*idx)+4] += alpha5*(*v);
733: idx++; v++;
734: }
735: }
736: PetscLogFlops(10.0*a->nz);
737: VecRestoreArrayRead(xx,&x);
738: VecRestoreArray(yy,&y);
739: return(0);
740: }
742: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
743: {
744: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
745: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
746: const PetscScalar *x,*v;
747: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
748: PetscErrorCode ierr;
749: PetscInt n,i,jrow,j;
750: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
753: if (yy != zz) {VecCopy(yy,zz);}
754: VecGetArrayRead(xx,&x);
755: VecGetArray(zz,&y);
756: idx = a->j;
757: v = a->a;
758: ii = a->i;
760: for (i=0; i<m; i++) {
761: jrow = ii[i];
762: n = ii[i+1] - jrow;
763: sum1 = 0.0;
764: sum2 = 0.0;
765: sum3 = 0.0;
766: sum4 = 0.0;
767: sum5 = 0.0;
768: for (j=0; j<n; j++) {
769: sum1 += v[jrow]*x[5*idx[jrow]];
770: sum2 += v[jrow]*x[5*idx[jrow]+1];
771: sum3 += v[jrow]*x[5*idx[jrow]+2];
772: sum4 += v[jrow]*x[5*idx[jrow]+3];
773: sum5 += v[jrow]*x[5*idx[jrow]+4];
774: jrow++;
775: }
776: y[5*i] += sum1;
777: y[5*i+1] += sum2;
778: y[5*i+2] += sum3;
779: y[5*i+3] += sum4;
780: y[5*i+4] += sum5;
781: }
783: PetscLogFlops(10.0*a->nz);
784: VecRestoreArrayRead(xx,&x);
785: VecRestoreArray(zz,&y);
786: return(0);
787: }
789: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
790: {
791: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
792: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
793: const PetscScalar *x,*v;
794: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
795: PetscErrorCode ierr;
796: PetscInt n,i;
797: const PetscInt m = b->AIJ->rmap->n,*idx;
800: if (yy != zz) {VecCopy(yy,zz);}
801: VecGetArrayRead(xx,&x);
802: VecGetArray(zz,&y);
804: for (i=0; i<m; i++) {
805: idx = a->j + a->i[i];
806: v = a->a + a->i[i];
807: n = a->i[i+1] - a->i[i];
808: alpha1 = x[5*i];
809: alpha2 = x[5*i+1];
810: alpha3 = x[5*i+2];
811: alpha4 = x[5*i+3];
812: alpha5 = x[5*i+4];
813: while (n-->0) {
814: y[5*(*idx)] += alpha1*(*v);
815: y[5*(*idx)+1] += alpha2*(*v);
816: y[5*(*idx)+2] += alpha3*(*v);
817: y[5*(*idx)+3] += alpha4*(*v);
818: y[5*(*idx)+4] += alpha5*(*v);
819: idx++; v++;
820: }
821: }
822: PetscLogFlops(10.0*a->nz);
823: VecRestoreArrayRead(xx,&x);
824: VecRestoreArray(zz,&y);
825: return(0);
826: }
828: /* ------------------------------------------------------------------------------*/
829: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
830: {
831: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
832: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
833: const PetscScalar *x,*v;
834: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
835: PetscErrorCode ierr;
836: PetscInt nonzerorow=0,n,i,jrow,j;
837: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
840: VecGetArrayRead(xx,&x);
841: VecGetArray(yy,&y);
842: idx = a->j;
843: v = a->a;
844: ii = a->i;
846: for (i=0; i<m; i++) {
847: jrow = ii[i];
848: n = ii[i+1] - jrow;
849: sum1 = 0.0;
850: sum2 = 0.0;
851: sum3 = 0.0;
852: sum4 = 0.0;
853: sum5 = 0.0;
854: sum6 = 0.0;
856: nonzerorow += (n>0);
857: for (j=0; j<n; j++) {
858: sum1 += v[jrow]*x[6*idx[jrow]];
859: sum2 += v[jrow]*x[6*idx[jrow]+1];
860: sum3 += v[jrow]*x[6*idx[jrow]+2];
861: sum4 += v[jrow]*x[6*idx[jrow]+3];
862: sum5 += v[jrow]*x[6*idx[jrow]+4];
863: sum6 += v[jrow]*x[6*idx[jrow]+5];
864: jrow++;
865: }
866: y[6*i] = sum1;
867: y[6*i+1] = sum2;
868: y[6*i+2] = sum3;
869: y[6*i+3] = sum4;
870: y[6*i+4] = sum5;
871: y[6*i+5] = sum6;
872: }
874: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
875: VecRestoreArrayRead(xx,&x);
876: VecRestoreArray(yy,&y);
877: return(0);
878: }
880: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
881: {
882: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
883: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
884: const PetscScalar *x,*v;
885: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
886: PetscErrorCode ierr;
887: PetscInt n,i;
888: const PetscInt m = b->AIJ->rmap->n,*idx;
891: VecSet(yy,0.0);
892: VecGetArrayRead(xx,&x);
893: VecGetArray(yy,&y);
895: for (i=0; i<m; i++) {
896: idx = a->j + a->i[i];
897: v = a->a + a->i[i];
898: n = a->i[i+1] - a->i[i];
899: alpha1 = x[6*i];
900: alpha2 = x[6*i+1];
901: alpha3 = x[6*i+2];
902: alpha4 = x[6*i+3];
903: alpha5 = x[6*i+4];
904: alpha6 = x[6*i+5];
905: while (n-->0) {
906: y[6*(*idx)] += alpha1*(*v);
907: y[6*(*idx)+1] += alpha2*(*v);
908: y[6*(*idx)+2] += alpha3*(*v);
909: y[6*(*idx)+3] += alpha4*(*v);
910: y[6*(*idx)+4] += alpha5*(*v);
911: y[6*(*idx)+5] += alpha6*(*v);
912: idx++; v++;
913: }
914: }
915: PetscLogFlops(12.0*a->nz);
916: VecRestoreArrayRead(xx,&x);
917: VecRestoreArray(yy,&y);
918: return(0);
919: }
921: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
922: {
923: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
924: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
925: const PetscScalar *x,*v;
926: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
927: PetscErrorCode ierr;
928: PetscInt n,i,jrow,j;
929: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
932: if (yy != zz) {VecCopy(yy,zz);}
933: VecGetArrayRead(xx,&x);
934: VecGetArray(zz,&y);
935: idx = a->j;
936: v = a->a;
937: ii = a->i;
939: for (i=0; i<m; i++) {
940: jrow = ii[i];
941: n = ii[i+1] - jrow;
942: sum1 = 0.0;
943: sum2 = 0.0;
944: sum3 = 0.0;
945: sum4 = 0.0;
946: sum5 = 0.0;
947: sum6 = 0.0;
948: for (j=0; j<n; j++) {
949: sum1 += v[jrow]*x[6*idx[jrow]];
950: sum2 += v[jrow]*x[6*idx[jrow]+1];
951: sum3 += v[jrow]*x[6*idx[jrow]+2];
952: sum4 += v[jrow]*x[6*idx[jrow]+3];
953: sum5 += v[jrow]*x[6*idx[jrow]+4];
954: sum6 += v[jrow]*x[6*idx[jrow]+5];
955: jrow++;
956: }
957: y[6*i] += sum1;
958: y[6*i+1] += sum2;
959: y[6*i+2] += sum3;
960: y[6*i+3] += sum4;
961: y[6*i+4] += sum5;
962: y[6*i+5] += sum6;
963: }
965: PetscLogFlops(12.0*a->nz);
966: VecRestoreArrayRead(xx,&x);
967: VecRestoreArray(zz,&y);
968: return(0);
969: }
971: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
972: {
973: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
974: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
975: const PetscScalar *x,*v;
976: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
977: PetscErrorCode ierr;
978: PetscInt n,i;
979: const PetscInt m = b->AIJ->rmap->n,*idx;
982: if (yy != zz) {VecCopy(yy,zz);}
983: VecGetArrayRead(xx,&x);
984: VecGetArray(zz,&y);
986: for (i=0; i<m; i++) {
987: idx = a->j + a->i[i];
988: v = a->a + a->i[i];
989: n = a->i[i+1] - a->i[i];
990: alpha1 = x[6*i];
991: alpha2 = x[6*i+1];
992: alpha3 = x[6*i+2];
993: alpha4 = x[6*i+3];
994: alpha5 = x[6*i+4];
995: alpha6 = x[6*i+5];
996: while (n-->0) {
997: y[6*(*idx)] += alpha1*(*v);
998: y[6*(*idx)+1] += alpha2*(*v);
999: y[6*(*idx)+2] += alpha3*(*v);
1000: y[6*(*idx)+3] += alpha4*(*v);
1001: y[6*(*idx)+4] += alpha5*(*v);
1002: y[6*(*idx)+5] += alpha6*(*v);
1003: idx++; v++;
1004: }
1005: }
1006: PetscLogFlops(12.0*a->nz);
1007: VecRestoreArrayRead(xx,&x);
1008: VecRestoreArray(zz,&y);
1009: return(0);
1010: }
1012: /* ------------------------------------------------------------------------------*/
1013: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1014: {
1015: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1016: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1017: const PetscScalar *x,*v;
1018: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1019: PetscErrorCode ierr;
1020: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1021: PetscInt nonzerorow=0,n,i,jrow,j;
1024: VecGetArrayRead(xx,&x);
1025: VecGetArray(yy,&y);
1026: idx = a->j;
1027: v = a->a;
1028: ii = a->i;
1030: for (i=0; i<m; i++) {
1031: jrow = ii[i];
1032: n = ii[i+1] - jrow;
1033: sum1 = 0.0;
1034: sum2 = 0.0;
1035: sum3 = 0.0;
1036: sum4 = 0.0;
1037: sum5 = 0.0;
1038: sum6 = 0.0;
1039: sum7 = 0.0;
1041: nonzerorow += (n>0);
1042: for (j=0; j<n; j++) {
1043: sum1 += v[jrow]*x[7*idx[jrow]];
1044: sum2 += v[jrow]*x[7*idx[jrow]+1];
1045: sum3 += v[jrow]*x[7*idx[jrow]+2];
1046: sum4 += v[jrow]*x[7*idx[jrow]+3];
1047: sum5 += v[jrow]*x[7*idx[jrow]+4];
1048: sum6 += v[jrow]*x[7*idx[jrow]+5];
1049: sum7 += v[jrow]*x[7*idx[jrow]+6];
1050: jrow++;
1051: }
1052: y[7*i] = sum1;
1053: y[7*i+1] = sum2;
1054: y[7*i+2] = sum3;
1055: y[7*i+3] = sum4;
1056: y[7*i+4] = sum5;
1057: y[7*i+5] = sum6;
1058: y[7*i+6] = sum7;
1059: }
1061: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1062: VecRestoreArrayRead(xx,&x);
1063: VecRestoreArray(yy,&y);
1064: return(0);
1065: }
1067: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1068: {
1069: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1070: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1071: const PetscScalar *x,*v;
1072: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1073: PetscErrorCode ierr;
1074: const PetscInt m = b->AIJ->rmap->n,*idx;
1075: PetscInt n,i;
1078: VecSet(yy,0.0);
1079: VecGetArrayRead(xx,&x);
1080: VecGetArray(yy,&y);
1082: for (i=0; i<m; i++) {
1083: idx = a->j + a->i[i];
1084: v = a->a + a->i[i];
1085: n = a->i[i+1] - a->i[i];
1086: alpha1 = x[7*i];
1087: alpha2 = x[7*i+1];
1088: alpha3 = x[7*i+2];
1089: alpha4 = x[7*i+3];
1090: alpha5 = x[7*i+4];
1091: alpha6 = x[7*i+5];
1092: alpha7 = x[7*i+6];
1093: while (n-->0) {
1094: y[7*(*idx)] += alpha1*(*v);
1095: y[7*(*idx)+1] += alpha2*(*v);
1096: y[7*(*idx)+2] += alpha3*(*v);
1097: y[7*(*idx)+3] += alpha4*(*v);
1098: y[7*(*idx)+4] += alpha5*(*v);
1099: y[7*(*idx)+5] += alpha6*(*v);
1100: y[7*(*idx)+6] += alpha7*(*v);
1101: idx++; v++;
1102: }
1103: }
1104: PetscLogFlops(14.0*a->nz);
1105: VecRestoreArrayRead(xx,&x);
1106: VecRestoreArray(yy,&y);
1107: return(0);
1108: }
1110: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1111: {
1112: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1113: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1114: const PetscScalar *x,*v;
1115: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1116: PetscErrorCode ierr;
1117: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1118: PetscInt n,i,jrow,j;
1121: if (yy != zz) {VecCopy(yy,zz);}
1122: VecGetArrayRead(xx,&x);
1123: VecGetArray(zz,&y);
1124: idx = a->j;
1125: v = a->a;
1126: ii = a->i;
1128: for (i=0; i<m; i++) {
1129: jrow = ii[i];
1130: n = ii[i+1] - jrow;
1131: sum1 = 0.0;
1132: sum2 = 0.0;
1133: sum3 = 0.0;
1134: sum4 = 0.0;
1135: sum5 = 0.0;
1136: sum6 = 0.0;
1137: sum7 = 0.0;
1138: for (j=0; j<n; j++) {
1139: sum1 += v[jrow]*x[7*idx[jrow]];
1140: sum2 += v[jrow]*x[7*idx[jrow]+1];
1141: sum3 += v[jrow]*x[7*idx[jrow]+2];
1142: sum4 += v[jrow]*x[7*idx[jrow]+3];
1143: sum5 += v[jrow]*x[7*idx[jrow]+4];
1144: sum6 += v[jrow]*x[7*idx[jrow]+5];
1145: sum7 += v[jrow]*x[7*idx[jrow]+6];
1146: jrow++;
1147: }
1148: y[7*i] += sum1;
1149: y[7*i+1] += sum2;
1150: y[7*i+2] += sum3;
1151: y[7*i+3] += sum4;
1152: y[7*i+4] += sum5;
1153: y[7*i+5] += sum6;
1154: y[7*i+6] += sum7;
1155: }
1157: PetscLogFlops(14.0*a->nz);
1158: VecRestoreArrayRead(xx,&x);
1159: VecRestoreArray(zz,&y);
1160: return(0);
1161: }
1163: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1164: {
1165: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1166: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1167: const PetscScalar *x,*v;
1168: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1169: PetscErrorCode ierr;
1170: const PetscInt m = b->AIJ->rmap->n,*idx;
1171: PetscInt n,i;
1174: if (yy != zz) {VecCopy(yy,zz);}
1175: VecGetArrayRead(xx,&x);
1176: VecGetArray(zz,&y);
1177: for (i=0; i<m; i++) {
1178: idx = a->j + a->i[i];
1179: v = a->a + a->i[i];
1180: n = a->i[i+1] - a->i[i];
1181: alpha1 = x[7*i];
1182: alpha2 = x[7*i+1];
1183: alpha3 = x[7*i+2];
1184: alpha4 = x[7*i+3];
1185: alpha5 = x[7*i+4];
1186: alpha6 = x[7*i+5];
1187: alpha7 = x[7*i+6];
1188: while (n-->0) {
1189: y[7*(*idx)] += alpha1*(*v);
1190: y[7*(*idx)+1] += alpha2*(*v);
1191: y[7*(*idx)+2] += alpha3*(*v);
1192: y[7*(*idx)+3] += alpha4*(*v);
1193: y[7*(*idx)+4] += alpha5*(*v);
1194: y[7*(*idx)+5] += alpha6*(*v);
1195: y[7*(*idx)+6] += alpha7*(*v);
1196: idx++; v++;
1197: }
1198: }
1199: PetscLogFlops(14.0*a->nz);
1200: VecRestoreArrayRead(xx,&x);
1201: VecRestoreArray(zz,&y);
1202: return(0);
1203: }
1205: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1206: {
1207: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1208: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1209: const PetscScalar *x,*v;
1210: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1211: PetscErrorCode ierr;
1212: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1213: PetscInt nonzerorow=0,n,i,jrow,j;
1216: VecGetArrayRead(xx,&x);
1217: VecGetArray(yy,&y);
1218: idx = a->j;
1219: v = a->a;
1220: ii = a->i;
1222: for (i=0; i<m; i++) {
1223: jrow = ii[i];
1224: n = ii[i+1] - jrow;
1225: sum1 = 0.0;
1226: sum2 = 0.0;
1227: sum3 = 0.0;
1228: sum4 = 0.0;
1229: sum5 = 0.0;
1230: sum6 = 0.0;
1231: sum7 = 0.0;
1232: sum8 = 0.0;
1234: nonzerorow += (n>0);
1235: for (j=0; j<n; j++) {
1236: sum1 += v[jrow]*x[8*idx[jrow]];
1237: sum2 += v[jrow]*x[8*idx[jrow]+1];
1238: sum3 += v[jrow]*x[8*idx[jrow]+2];
1239: sum4 += v[jrow]*x[8*idx[jrow]+3];
1240: sum5 += v[jrow]*x[8*idx[jrow]+4];
1241: sum6 += v[jrow]*x[8*idx[jrow]+5];
1242: sum7 += v[jrow]*x[8*idx[jrow]+6];
1243: sum8 += v[jrow]*x[8*idx[jrow]+7];
1244: jrow++;
1245: }
1246: y[8*i] = sum1;
1247: y[8*i+1] = sum2;
1248: y[8*i+2] = sum3;
1249: y[8*i+3] = sum4;
1250: y[8*i+4] = sum5;
1251: y[8*i+5] = sum6;
1252: y[8*i+6] = sum7;
1253: y[8*i+7] = sum8;
1254: }
1256: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1257: VecRestoreArrayRead(xx,&x);
1258: VecRestoreArray(yy,&y);
1259: return(0);
1260: }
1262: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1263: {
1264: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1265: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1266: const PetscScalar *x,*v;
1267: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1268: PetscErrorCode ierr;
1269: const PetscInt m = b->AIJ->rmap->n,*idx;
1270: PetscInt n,i;
1273: VecSet(yy,0.0);
1274: VecGetArrayRead(xx,&x);
1275: VecGetArray(yy,&y);
1277: for (i=0; i<m; i++) {
1278: idx = a->j + a->i[i];
1279: v = a->a + a->i[i];
1280: n = a->i[i+1] - a->i[i];
1281: alpha1 = x[8*i];
1282: alpha2 = x[8*i+1];
1283: alpha3 = x[8*i+2];
1284: alpha4 = x[8*i+3];
1285: alpha5 = x[8*i+4];
1286: alpha6 = x[8*i+5];
1287: alpha7 = x[8*i+6];
1288: alpha8 = x[8*i+7];
1289: while (n-->0) {
1290: y[8*(*idx)] += alpha1*(*v);
1291: y[8*(*idx)+1] += alpha2*(*v);
1292: y[8*(*idx)+2] += alpha3*(*v);
1293: y[8*(*idx)+3] += alpha4*(*v);
1294: y[8*(*idx)+4] += alpha5*(*v);
1295: y[8*(*idx)+5] += alpha6*(*v);
1296: y[8*(*idx)+6] += alpha7*(*v);
1297: y[8*(*idx)+7] += alpha8*(*v);
1298: idx++; v++;
1299: }
1300: }
1301: PetscLogFlops(16.0*a->nz);
1302: VecRestoreArrayRead(xx,&x);
1303: VecRestoreArray(yy,&y);
1304: return(0);
1305: }
1307: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1308: {
1309: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1311: const PetscScalar *x,*v;
1312: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1313: PetscErrorCode ierr;
1314: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1315: PetscInt n,i,jrow,j;
1318: if (yy != zz) {VecCopy(yy,zz);}
1319: VecGetArrayRead(xx,&x);
1320: VecGetArray(zz,&y);
1321: idx = a->j;
1322: v = a->a;
1323: ii = a->i;
1325: for (i=0; i<m; i++) {
1326: jrow = ii[i];
1327: n = ii[i+1] - jrow;
1328: sum1 = 0.0;
1329: sum2 = 0.0;
1330: sum3 = 0.0;
1331: sum4 = 0.0;
1332: sum5 = 0.0;
1333: sum6 = 0.0;
1334: sum7 = 0.0;
1335: sum8 = 0.0;
1336: for (j=0; j<n; j++) {
1337: sum1 += v[jrow]*x[8*idx[jrow]];
1338: sum2 += v[jrow]*x[8*idx[jrow]+1];
1339: sum3 += v[jrow]*x[8*idx[jrow]+2];
1340: sum4 += v[jrow]*x[8*idx[jrow]+3];
1341: sum5 += v[jrow]*x[8*idx[jrow]+4];
1342: sum6 += v[jrow]*x[8*idx[jrow]+5];
1343: sum7 += v[jrow]*x[8*idx[jrow]+6];
1344: sum8 += v[jrow]*x[8*idx[jrow]+7];
1345: jrow++;
1346: }
1347: y[8*i] += sum1;
1348: y[8*i+1] += sum2;
1349: y[8*i+2] += sum3;
1350: y[8*i+3] += sum4;
1351: y[8*i+4] += sum5;
1352: y[8*i+5] += sum6;
1353: y[8*i+6] += sum7;
1354: y[8*i+7] += sum8;
1355: }
1357: PetscLogFlops(16.0*a->nz);
1358: VecRestoreArrayRead(xx,&x);
1359: VecRestoreArray(zz,&y);
1360: return(0);
1361: }
1363: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1364: {
1365: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1366: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1367: const PetscScalar *x,*v;
1368: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1369: PetscErrorCode ierr;
1370: const PetscInt m = b->AIJ->rmap->n,*idx;
1371: PetscInt n,i;
1374: if (yy != zz) {VecCopy(yy,zz);}
1375: VecGetArrayRead(xx,&x);
1376: VecGetArray(zz,&y);
1377: for (i=0; i<m; i++) {
1378: idx = a->j + a->i[i];
1379: v = a->a + a->i[i];
1380: n = a->i[i+1] - a->i[i];
1381: alpha1 = x[8*i];
1382: alpha2 = x[8*i+1];
1383: alpha3 = x[8*i+2];
1384: alpha4 = x[8*i+3];
1385: alpha5 = x[8*i+4];
1386: alpha6 = x[8*i+5];
1387: alpha7 = x[8*i+6];
1388: alpha8 = x[8*i+7];
1389: while (n-->0) {
1390: y[8*(*idx)] += alpha1*(*v);
1391: y[8*(*idx)+1] += alpha2*(*v);
1392: y[8*(*idx)+2] += alpha3*(*v);
1393: y[8*(*idx)+3] += alpha4*(*v);
1394: y[8*(*idx)+4] += alpha5*(*v);
1395: y[8*(*idx)+5] += alpha6*(*v);
1396: y[8*(*idx)+6] += alpha7*(*v);
1397: y[8*(*idx)+7] += alpha8*(*v);
1398: idx++; v++;
1399: }
1400: }
1401: PetscLogFlops(16.0*a->nz);
1402: VecRestoreArrayRead(xx,&x);
1403: VecRestoreArray(zz,&y);
1404: return(0);
1405: }
1407: /* ------------------------------------------------------------------------------*/
1408: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1409: {
1410: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1411: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1412: const PetscScalar *x,*v;
1413: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1414: PetscErrorCode ierr;
1415: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1416: PetscInt nonzerorow=0,n,i,jrow,j;
1419: VecGetArrayRead(xx,&x);
1420: VecGetArray(yy,&y);
1421: idx = a->j;
1422: v = a->a;
1423: ii = a->i;
1425: for (i=0; i<m; i++) {
1426: jrow = ii[i];
1427: n = ii[i+1] - jrow;
1428: sum1 = 0.0;
1429: sum2 = 0.0;
1430: sum3 = 0.0;
1431: sum4 = 0.0;
1432: sum5 = 0.0;
1433: sum6 = 0.0;
1434: sum7 = 0.0;
1435: sum8 = 0.0;
1436: sum9 = 0.0;
1438: nonzerorow += (n>0);
1439: for (j=0; j<n; j++) {
1440: sum1 += v[jrow]*x[9*idx[jrow]];
1441: sum2 += v[jrow]*x[9*idx[jrow]+1];
1442: sum3 += v[jrow]*x[9*idx[jrow]+2];
1443: sum4 += v[jrow]*x[9*idx[jrow]+3];
1444: sum5 += v[jrow]*x[9*idx[jrow]+4];
1445: sum6 += v[jrow]*x[9*idx[jrow]+5];
1446: sum7 += v[jrow]*x[9*idx[jrow]+6];
1447: sum8 += v[jrow]*x[9*idx[jrow]+7];
1448: sum9 += v[jrow]*x[9*idx[jrow]+8];
1449: jrow++;
1450: }
1451: y[9*i] = sum1;
1452: y[9*i+1] = sum2;
1453: y[9*i+2] = sum3;
1454: y[9*i+3] = sum4;
1455: y[9*i+4] = sum5;
1456: y[9*i+5] = sum6;
1457: y[9*i+6] = sum7;
1458: y[9*i+7] = sum8;
1459: y[9*i+8] = sum9;
1460: }
1462: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1463: VecRestoreArrayRead(xx,&x);
1464: VecRestoreArray(yy,&y);
1465: return(0);
1466: }
1468: /* ------------------------------------------------------------------------------*/
1470: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1471: {
1472: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1473: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1474: const PetscScalar *x,*v;
1475: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1476: PetscErrorCode ierr;
1477: const PetscInt m = b->AIJ->rmap->n,*idx;
1478: PetscInt n,i;
1481: VecSet(yy,0.0);
1482: VecGetArrayRead(xx,&x);
1483: VecGetArray(yy,&y);
1485: for (i=0; i<m; i++) {
1486: idx = a->j + a->i[i];
1487: v = a->a + a->i[i];
1488: n = a->i[i+1] - a->i[i];
1489: alpha1 = x[9*i];
1490: alpha2 = x[9*i+1];
1491: alpha3 = x[9*i+2];
1492: alpha4 = x[9*i+3];
1493: alpha5 = x[9*i+4];
1494: alpha6 = x[9*i+5];
1495: alpha7 = x[9*i+6];
1496: alpha8 = x[9*i+7];
1497: alpha9 = x[9*i+8];
1498: while (n-->0) {
1499: y[9*(*idx)] += alpha1*(*v);
1500: y[9*(*idx)+1] += alpha2*(*v);
1501: y[9*(*idx)+2] += alpha3*(*v);
1502: y[9*(*idx)+3] += alpha4*(*v);
1503: y[9*(*idx)+4] += alpha5*(*v);
1504: y[9*(*idx)+5] += alpha6*(*v);
1505: y[9*(*idx)+6] += alpha7*(*v);
1506: y[9*(*idx)+7] += alpha8*(*v);
1507: y[9*(*idx)+8] += alpha9*(*v);
1508: idx++; v++;
1509: }
1510: }
1511: PetscLogFlops(18.0*a->nz);
1512: VecRestoreArrayRead(xx,&x);
1513: VecRestoreArray(yy,&y);
1514: return(0);
1515: }
1517: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1518: {
1519: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1520: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1521: const PetscScalar *x,*v;
1522: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1523: PetscErrorCode ierr;
1524: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1525: PetscInt n,i,jrow,j;
1528: if (yy != zz) {VecCopy(yy,zz);}
1529: VecGetArrayRead(xx,&x);
1530: VecGetArray(zz,&y);
1531: idx = a->j;
1532: v = a->a;
1533: ii = a->i;
1535: for (i=0; i<m; i++) {
1536: jrow = ii[i];
1537: n = ii[i+1] - jrow;
1538: sum1 = 0.0;
1539: sum2 = 0.0;
1540: sum3 = 0.0;
1541: sum4 = 0.0;
1542: sum5 = 0.0;
1543: sum6 = 0.0;
1544: sum7 = 0.0;
1545: sum8 = 0.0;
1546: sum9 = 0.0;
1547: for (j=0; j<n; j++) {
1548: sum1 += v[jrow]*x[9*idx[jrow]];
1549: sum2 += v[jrow]*x[9*idx[jrow]+1];
1550: sum3 += v[jrow]*x[9*idx[jrow]+2];
1551: sum4 += v[jrow]*x[9*idx[jrow]+3];
1552: sum5 += v[jrow]*x[9*idx[jrow]+4];
1553: sum6 += v[jrow]*x[9*idx[jrow]+5];
1554: sum7 += v[jrow]*x[9*idx[jrow]+6];
1555: sum8 += v[jrow]*x[9*idx[jrow]+7];
1556: sum9 += v[jrow]*x[9*idx[jrow]+8];
1557: jrow++;
1558: }
1559: y[9*i] += sum1;
1560: y[9*i+1] += sum2;
1561: y[9*i+2] += sum3;
1562: y[9*i+3] += sum4;
1563: y[9*i+4] += sum5;
1564: y[9*i+5] += sum6;
1565: y[9*i+6] += sum7;
1566: y[9*i+7] += sum8;
1567: y[9*i+8] += sum9;
1568: }
1570: PetscLogFlops(18*a->nz);
1571: VecRestoreArrayRead(xx,&x);
1572: VecRestoreArray(zz,&y);
1573: return(0);
1574: }
1576: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577: {
1578: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1579: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1580: const PetscScalar *x,*v;
1581: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1582: PetscErrorCode ierr;
1583: const PetscInt m = b->AIJ->rmap->n,*idx;
1584: PetscInt n,i;
1587: if (yy != zz) {VecCopy(yy,zz);}
1588: VecGetArrayRead(xx,&x);
1589: VecGetArray(zz,&y);
1590: for (i=0; i<m; i++) {
1591: idx = a->j + a->i[i];
1592: v = a->a + a->i[i];
1593: n = a->i[i+1] - a->i[i];
1594: alpha1 = x[9*i];
1595: alpha2 = x[9*i+1];
1596: alpha3 = x[9*i+2];
1597: alpha4 = x[9*i+3];
1598: alpha5 = x[9*i+4];
1599: alpha6 = x[9*i+5];
1600: alpha7 = x[9*i+6];
1601: alpha8 = x[9*i+7];
1602: alpha9 = x[9*i+8];
1603: while (n-->0) {
1604: y[9*(*idx)] += alpha1*(*v);
1605: y[9*(*idx)+1] += alpha2*(*v);
1606: y[9*(*idx)+2] += alpha3*(*v);
1607: y[9*(*idx)+3] += alpha4*(*v);
1608: y[9*(*idx)+4] += alpha5*(*v);
1609: y[9*(*idx)+5] += alpha6*(*v);
1610: y[9*(*idx)+6] += alpha7*(*v);
1611: y[9*(*idx)+7] += alpha8*(*v);
1612: y[9*(*idx)+8] += alpha9*(*v);
1613: idx++; v++;
1614: }
1615: }
1616: PetscLogFlops(18.0*a->nz);
1617: VecRestoreArrayRead(xx,&x);
1618: VecRestoreArray(zz,&y);
1619: return(0);
1620: }
1621: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1622: {
1623: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1624: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1625: const PetscScalar *x,*v;
1626: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1627: PetscErrorCode ierr;
1628: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1629: PetscInt nonzerorow=0,n,i,jrow,j;
1632: VecGetArrayRead(xx,&x);
1633: VecGetArray(yy,&y);
1634: idx = a->j;
1635: v = a->a;
1636: ii = a->i;
1638: for (i=0; i<m; i++) {
1639: jrow = ii[i];
1640: n = ii[i+1] - jrow;
1641: sum1 = 0.0;
1642: sum2 = 0.0;
1643: sum3 = 0.0;
1644: sum4 = 0.0;
1645: sum5 = 0.0;
1646: sum6 = 0.0;
1647: sum7 = 0.0;
1648: sum8 = 0.0;
1649: sum9 = 0.0;
1650: sum10 = 0.0;
1652: nonzerorow += (n>0);
1653: for (j=0; j<n; j++) {
1654: sum1 += v[jrow]*x[10*idx[jrow]];
1655: sum2 += v[jrow]*x[10*idx[jrow]+1];
1656: sum3 += v[jrow]*x[10*idx[jrow]+2];
1657: sum4 += v[jrow]*x[10*idx[jrow]+3];
1658: sum5 += v[jrow]*x[10*idx[jrow]+4];
1659: sum6 += v[jrow]*x[10*idx[jrow]+5];
1660: sum7 += v[jrow]*x[10*idx[jrow]+6];
1661: sum8 += v[jrow]*x[10*idx[jrow]+7];
1662: sum9 += v[jrow]*x[10*idx[jrow]+8];
1663: sum10 += v[jrow]*x[10*idx[jrow]+9];
1664: jrow++;
1665: }
1666: y[10*i] = sum1;
1667: y[10*i+1] = sum2;
1668: y[10*i+2] = sum3;
1669: y[10*i+3] = sum4;
1670: y[10*i+4] = sum5;
1671: y[10*i+5] = sum6;
1672: y[10*i+6] = sum7;
1673: y[10*i+7] = sum8;
1674: y[10*i+8] = sum9;
1675: y[10*i+9] = sum10;
1676: }
1678: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1679: VecRestoreArrayRead(xx,&x);
1680: VecRestoreArray(yy,&y);
1681: return(0);
1682: }
1684: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1685: {
1686: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1687: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1688: const PetscScalar *x,*v;
1689: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690: PetscErrorCode ierr;
1691: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1692: PetscInt n,i,jrow,j;
1695: if (yy != zz) {VecCopy(yy,zz);}
1696: VecGetArrayRead(xx,&x);
1697: VecGetArray(zz,&y);
1698: idx = a->j;
1699: v = a->a;
1700: ii = a->i;
1702: for (i=0; i<m; i++) {
1703: jrow = ii[i];
1704: n = ii[i+1] - jrow;
1705: sum1 = 0.0;
1706: sum2 = 0.0;
1707: sum3 = 0.0;
1708: sum4 = 0.0;
1709: sum5 = 0.0;
1710: sum6 = 0.0;
1711: sum7 = 0.0;
1712: sum8 = 0.0;
1713: sum9 = 0.0;
1714: sum10 = 0.0;
1715: for (j=0; j<n; j++) {
1716: sum1 += v[jrow]*x[10*idx[jrow]];
1717: sum2 += v[jrow]*x[10*idx[jrow]+1];
1718: sum3 += v[jrow]*x[10*idx[jrow]+2];
1719: sum4 += v[jrow]*x[10*idx[jrow]+3];
1720: sum5 += v[jrow]*x[10*idx[jrow]+4];
1721: sum6 += v[jrow]*x[10*idx[jrow]+5];
1722: sum7 += v[jrow]*x[10*idx[jrow]+6];
1723: sum8 += v[jrow]*x[10*idx[jrow]+7];
1724: sum9 += v[jrow]*x[10*idx[jrow]+8];
1725: sum10 += v[jrow]*x[10*idx[jrow]+9];
1726: jrow++;
1727: }
1728: y[10*i] += sum1;
1729: y[10*i+1] += sum2;
1730: y[10*i+2] += sum3;
1731: y[10*i+3] += sum4;
1732: y[10*i+4] += sum5;
1733: y[10*i+5] += sum6;
1734: y[10*i+6] += sum7;
1735: y[10*i+7] += sum8;
1736: y[10*i+8] += sum9;
1737: y[10*i+9] += sum10;
1738: }
1740: PetscLogFlops(20.0*a->nz);
1741: VecRestoreArrayRead(xx,&x);
1742: VecRestoreArray(yy,&y);
1743: return(0);
1744: }
1746: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1747: {
1748: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1749: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1750: const PetscScalar *x,*v;
1751: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1752: PetscErrorCode ierr;
1753: const PetscInt m = b->AIJ->rmap->n,*idx;
1754: PetscInt n,i;
1757: VecSet(yy,0.0);
1758: VecGetArrayRead(xx,&x);
1759: VecGetArray(yy,&y);
1761: for (i=0; i<m; i++) {
1762: idx = a->j + a->i[i];
1763: v = a->a + a->i[i];
1764: n = a->i[i+1] - a->i[i];
1765: alpha1 = x[10*i];
1766: alpha2 = x[10*i+1];
1767: alpha3 = x[10*i+2];
1768: alpha4 = x[10*i+3];
1769: alpha5 = x[10*i+4];
1770: alpha6 = x[10*i+5];
1771: alpha7 = x[10*i+6];
1772: alpha8 = x[10*i+7];
1773: alpha9 = x[10*i+8];
1774: alpha10 = x[10*i+9];
1775: while (n-->0) {
1776: y[10*(*idx)] += alpha1*(*v);
1777: y[10*(*idx)+1] += alpha2*(*v);
1778: y[10*(*idx)+2] += alpha3*(*v);
1779: y[10*(*idx)+3] += alpha4*(*v);
1780: y[10*(*idx)+4] += alpha5*(*v);
1781: y[10*(*idx)+5] += alpha6*(*v);
1782: y[10*(*idx)+6] += alpha7*(*v);
1783: y[10*(*idx)+7] += alpha8*(*v);
1784: y[10*(*idx)+8] += alpha9*(*v);
1785: y[10*(*idx)+9] += alpha10*(*v);
1786: idx++; v++;
1787: }
1788: }
1789: PetscLogFlops(20.0*a->nz);
1790: VecRestoreArrayRead(xx,&x);
1791: VecRestoreArray(yy,&y);
1792: return(0);
1793: }
1795: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1796: {
1797: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1798: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1799: const PetscScalar *x,*v;
1800: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1801: PetscErrorCode ierr;
1802: const PetscInt m = b->AIJ->rmap->n,*idx;
1803: PetscInt n,i;
1806: if (yy != zz) {VecCopy(yy,zz);}
1807: VecGetArrayRead(xx,&x);
1808: VecGetArray(zz,&y);
1809: for (i=0; i<m; i++) {
1810: idx = a->j + a->i[i];
1811: v = a->a + a->i[i];
1812: n = a->i[i+1] - a->i[i];
1813: alpha1 = x[10*i];
1814: alpha2 = x[10*i+1];
1815: alpha3 = x[10*i+2];
1816: alpha4 = x[10*i+3];
1817: alpha5 = x[10*i+4];
1818: alpha6 = x[10*i+5];
1819: alpha7 = x[10*i+6];
1820: alpha8 = x[10*i+7];
1821: alpha9 = x[10*i+8];
1822: alpha10 = x[10*i+9];
1823: while (n-->0) {
1824: y[10*(*idx)] += alpha1*(*v);
1825: y[10*(*idx)+1] += alpha2*(*v);
1826: y[10*(*idx)+2] += alpha3*(*v);
1827: y[10*(*idx)+3] += alpha4*(*v);
1828: y[10*(*idx)+4] += alpha5*(*v);
1829: y[10*(*idx)+5] += alpha6*(*v);
1830: y[10*(*idx)+6] += alpha7*(*v);
1831: y[10*(*idx)+7] += alpha8*(*v);
1832: y[10*(*idx)+8] += alpha9*(*v);
1833: y[10*(*idx)+9] += alpha10*(*v);
1834: idx++; v++;
1835: }
1836: }
1837: PetscLogFlops(20.0*a->nz);
1838: VecRestoreArrayRead(xx,&x);
1839: VecRestoreArray(zz,&y);
1840: return(0);
1841: }
1844: /*--------------------------------------------------------------------------------------------*/
1845: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1846: {
1847: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1848: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1849: const PetscScalar *x,*v;
1850: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1851: PetscErrorCode ierr;
1852: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1853: PetscInt nonzerorow=0,n,i,jrow,j;
1856: VecGetArrayRead(xx,&x);
1857: VecGetArray(yy,&y);
1858: idx = a->j;
1859: v = a->a;
1860: ii = a->i;
1862: for (i=0; i<m; i++) {
1863: jrow = ii[i];
1864: n = ii[i+1] - jrow;
1865: sum1 = 0.0;
1866: sum2 = 0.0;
1867: sum3 = 0.0;
1868: sum4 = 0.0;
1869: sum5 = 0.0;
1870: sum6 = 0.0;
1871: sum7 = 0.0;
1872: sum8 = 0.0;
1873: sum9 = 0.0;
1874: sum10 = 0.0;
1875: sum11 = 0.0;
1877: nonzerorow += (n>0);
1878: for (j=0; j<n; j++) {
1879: sum1 += v[jrow]*x[11*idx[jrow]];
1880: sum2 += v[jrow]*x[11*idx[jrow]+1];
1881: sum3 += v[jrow]*x[11*idx[jrow]+2];
1882: sum4 += v[jrow]*x[11*idx[jrow]+3];
1883: sum5 += v[jrow]*x[11*idx[jrow]+4];
1884: sum6 += v[jrow]*x[11*idx[jrow]+5];
1885: sum7 += v[jrow]*x[11*idx[jrow]+6];
1886: sum8 += v[jrow]*x[11*idx[jrow]+7];
1887: sum9 += v[jrow]*x[11*idx[jrow]+8];
1888: sum10 += v[jrow]*x[11*idx[jrow]+9];
1889: sum11 += v[jrow]*x[11*idx[jrow]+10];
1890: jrow++;
1891: }
1892: y[11*i] = sum1;
1893: y[11*i+1] = sum2;
1894: y[11*i+2] = sum3;
1895: y[11*i+3] = sum4;
1896: y[11*i+4] = sum5;
1897: y[11*i+5] = sum6;
1898: y[11*i+6] = sum7;
1899: y[11*i+7] = sum8;
1900: y[11*i+8] = sum9;
1901: y[11*i+9] = sum10;
1902: y[11*i+10] = sum11;
1903: }
1905: PetscLogFlops(22*a->nz - 11*nonzerorow);
1906: VecRestoreArrayRead(xx,&x);
1907: VecRestoreArray(yy,&y);
1908: return(0);
1909: }
1911: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1912: {
1913: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1914: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1915: const PetscScalar *x,*v;
1916: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1917: PetscErrorCode ierr;
1918: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1919: PetscInt n,i,jrow,j;
1922: if (yy != zz) {VecCopy(yy,zz);}
1923: VecGetArrayRead(xx,&x);
1924: VecGetArray(zz,&y);
1925: idx = a->j;
1926: v = a->a;
1927: ii = a->i;
1929: for (i=0; i<m; i++) {
1930: jrow = ii[i];
1931: n = ii[i+1] - jrow;
1932: sum1 = 0.0;
1933: sum2 = 0.0;
1934: sum3 = 0.0;
1935: sum4 = 0.0;
1936: sum5 = 0.0;
1937: sum6 = 0.0;
1938: sum7 = 0.0;
1939: sum8 = 0.0;
1940: sum9 = 0.0;
1941: sum10 = 0.0;
1942: sum11 = 0.0;
1943: for (j=0; j<n; j++) {
1944: sum1 += v[jrow]*x[11*idx[jrow]];
1945: sum2 += v[jrow]*x[11*idx[jrow]+1];
1946: sum3 += v[jrow]*x[11*idx[jrow]+2];
1947: sum4 += v[jrow]*x[11*idx[jrow]+3];
1948: sum5 += v[jrow]*x[11*idx[jrow]+4];
1949: sum6 += v[jrow]*x[11*idx[jrow]+5];
1950: sum7 += v[jrow]*x[11*idx[jrow]+6];
1951: sum8 += v[jrow]*x[11*idx[jrow]+7];
1952: sum9 += v[jrow]*x[11*idx[jrow]+8];
1953: sum10 += v[jrow]*x[11*idx[jrow]+9];
1954: sum11 += v[jrow]*x[11*idx[jrow]+10];
1955: jrow++;
1956: }
1957: y[11*i] += sum1;
1958: y[11*i+1] += sum2;
1959: y[11*i+2] += sum3;
1960: y[11*i+3] += sum4;
1961: y[11*i+4] += sum5;
1962: y[11*i+5] += sum6;
1963: y[11*i+6] += sum7;
1964: y[11*i+7] += sum8;
1965: y[11*i+8] += sum9;
1966: y[11*i+9] += sum10;
1967: y[11*i+10] += sum11;
1968: }
1970: PetscLogFlops(22*a->nz);
1971: VecRestoreArrayRead(xx,&x);
1972: VecRestoreArray(yy,&y);
1973: return(0);
1974: }
1976: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1977: {
1978: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1979: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1980: const PetscScalar *x,*v;
1981: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1982: PetscErrorCode ierr;
1983: const PetscInt m = b->AIJ->rmap->n,*idx;
1984: PetscInt n,i;
1987: VecSet(yy,0.0);
1988: VecGetArrayRead(xx,&x);
1989: VecGetArray(yy,&y);
1991: for (i=0; i<m; i++) {
1992: idx = a->j + a->i[i];
1993: v = a->a + a->i[i];
1994: n = a->i[i+1] - a->i[i];
1995: alpha1 = x[11*i];
1996: alpha2 = x[11*i+1];
1997: alpha3 = x[11*i+2];
1998: alpha4 = x[11*i+3];
1999: alpha5 = x[11*i+4];
2000: alpha6 = x[11*i+5];
2001: alpha7 = x[11*i+6];
2002: alpha8 = x[11*i+7];
2003: alpha9 = x[11*i+8];
2004: alpha10 = x[11*i+9];
2005: alpha11 = x[11*i+10];
2006: while (n-->0) {
2007: y[11*(*idx)] += alpha1*(*v);
2008: y[11*(*idx)+1] += alpha2*(*v);
2009: y[11*(*idx)+2] += alpha3*(*v);
2010: y[11*(*idx)+3] += alpha4*(*v);
2011: y[11*(*idx)+4] += alpha5*(*v);
2012: y[11*(*idx)+5] += alpha6*(*v);
2013: y[11*(*idx)+6] += alpha7*(*v);
2014: y[11*(*idx)+7] += alpha8*(*v);
2015: y[11*(*idx)+8] += alpha9*(*v);
2016: y[11*(*idx)+9] += alpha10*(*v);
2017: y[11*(*idx)+10] += alpha11*(*v);
2018: idx++; v++;
2019: }
2020: }
2021: PetscLogFlops(22*a->nz);
2022: VecRestoreArrayRead(xx,&x);
2023: VecRestoreArray(yy,&y);
2024: return(0);
2025: }
2027: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2028: {
2029: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2030: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2031: const PetscScalar *x,*v;
2032: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2033: PetscErrorCode ierr;
2034: const PetscInt m = b->AIJ->rmap->n,*idx;
2035: PetscInt n,i;
2038: if (yy != zz) {VecCopy(yy,zz);}
2039: VecGetArrayRead(xx,&x);
2040: VecGetArray(zz,&y);
2041: for (i=0; i<m; i++) {
2042: idx = a->j + a->i[i];
2043: v = a->a + a->i[i];
2044: n = a->i[i+1] - a->i[i];
2045: alpha1 = x[11*i];
2046: alpha2 = x[11*i+1];
2047: alpha3 = x[11*i+2];
2048: alpha4 = x[11*i+3];
2049: alpha5 = x[11*i+4];
2050: alpha6 = x[11*i+5];
2051: alpha7 = x[11*i+6];
2052: alpha8 = x[11*i+7];
2053: alpha9 = x[11*i+8];
2054: alpha10 = x[11*i+9];
2055: alpha11 = x[11*i+10];
2056: while (n-->0) {
2057: y[11*(*idx)] += alpha1*(*v);
2058: y[11*(*idx)+1] += alpha2*(*v);
2059: y[11*(*idx)+2] += alpha3*(*v);
2060: y[11*(*idx)+3] += alpha4*(*v);
2061: y[11*(*idx)+4] += alpha5*(*v);
2062: y[11*(*idx)+5] += alpha6*(*v);
2063: y[11*(*idx)+6] += alpha7*(*v);
2064: y[11*(*idx)+7] += alpha8*(*v);
2065: y[11*(*idx)+8] += alpha9*(*v);
2066: y[11*(*idx)+9] += alpha10*(*v);
2067: y[11*(*idx)+10] += alpha11*(*v);
2068: idx++; v++;
2069: }
2070: }
2071: PetscLogFlops(22*a->nz);
2072: VecRestoreArrayRead(xx,&x);
2073: VecRestoreArray(zz,&y);
2074: return(0);
2075: }
2078: /*--------------------------------------------------------------------------------------------*/
2079: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2080: {
2081: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2082: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2083: const PetscScalar *x,*v;
2084: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2085: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2086: PetscErrorCode ierr;
2087: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2088: PetscInt nonzerorow=0,n,i,jrow,j;
2091: VecGetArrayRead(xx,&x);
2092: VecGetArray(yy,&y);
2093: idx = a->j;
2094: v = a->a;
2095: ii = a->i;
2097: for (i=0; i<m; i++) {
2098: jrow = ii[i];
2099: n = ii[i+1] - jrow;
2100: sum1 = 0.0;
2101: sum2 = 0.0;
2102: sum3 = 0.0;
2103: sum4 = 0.0;
2104: sum5 = 0.0;
2105: sum6 = 0.0;
2106: sum7 = 0.0;
2107: sum8 = 0.0;
2108: sum9 = 0.0;
2109: sum10 = 0.0;
2110: sum11 = 0.0;
2111: sum12 = 0.0;
2112: sum13 = 0.0;
2113: sum14 = 0.0;
2114: sum15 = 0.0;
2115: sum16 = 0.0;
2117: nonzerorow += (n>0);
2118: for (j=0; j<n; j++) {
2119: sum1 += v[jrow]*x[16*idx[jrow]];
2120: sum2 += v[jrow]*x[16*idx[jrow]+1];
2121: sum3 += v[jrow]*x[16*idx[jrow]+2];
2122: sum4 += v[jrow]*x[16*idx[jrow]+3];
2123: sum5 += v[jrow]*x[16*idx[jrow]+4];
2124: sum6 += v[jrow]*x[16*idx[jrow]+5];
2125: sum7 += v[jrow]*x[16*idx[jrow]+6];
2126: sum8 += v[jrow]*x[16*idx[jrow]+7];
2127: sum9 += v[jrow]*x[16*idx[jrow]+8];
2128: sum10 += v[jrow]*x[16*idx[jrow]+9];
2129: sum11 += v[jrow]*x[16*idx[jrow]+10];
2130: sum12 += v[jrow]*x[16*idx[jrow]+11];
2131: sum13 += v[jrow]*x[16*idx[jrow]+12];
2132: sum14 += v[jrow]*x[16*idx[jrow]+13];
2133: sum15 += v[jrow]*x[16*idx[jrow]+14];
2134: sum16 += v[jrow]*x[16*idx[jrow]+15];
2135: jrow++;
2136: }
2137: y[16*i] = sum1;
2138: y[16*i+1] = sum2;
2139: y[16*i+2] = sum3;
2140: y[16*i+3] = sum4;
2141: y[16*i+4] = sum5;
2142: y[16*i+5] = sum6;
2143: y[16*i+6] = sum7;
2144: y[16*i+7] = sum8;
2145: y[16*i+8] = sum9;
2146: y[16*i+9] = sum10;
2147: y[16*i+10] = sum11;
2148: y[16*i+11] = sum12;
2149: y[16*i+12] = sum13;
2150: y[16*i+13] = sum14;
2151: y[16*i+14] = sum15;
2152: y[16*i+15] = sum16;
2153: }
2155: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2156: VecRestoreArrayRead(xx,&x);
2157: VecRestoreArray(yy,&y);
2158: return(0);
2159: }
2161: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2162: {
2163: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2164: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2165: const PetscScalar *x,*v;
2166: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2167: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2168: PetscErrorCode ierr;
2169: const PetscInt m = b->AIJ->rmap->n,*idx;
2170: PetscInt n,i;
2173: VecSet(yy,0.0);
2174: VecGetArrayRead(xx,&x);
2175: VecGetArray(yy,&y);
2177: for (i=0; i<m; i++) {
2178: idx = a->j + a->i[i];
2179: v = a->a + a->i[i];
2180: n = a->i[i+1] - a->i[i];
2181: alpha1 = x[16*i];
2182: alpha2 = x[16*i+1];
2183: alpha3 = x[16*i+2];
2184: alpha4 = x[16*i+3];
2185: alpha5 = x[16*i+4];
2186: alpha6 = x[16*i+5];
2187: alpha7 = x[16*i+6];
2188: alpha8 = x[16*i+7];
2189: alpha9 = x[16*i+8];
2190: alpha10 = x[16*i+9];
2191: alpha11 = x[16*i+10];
2192: alpha12 = x[16*i+11];
2193: alpha13 = x[16*i+12];
2194: alpha14 = x[16*i+13];
2195: alpha15 = x[16*i+14];
2196: alpha16 = x[16*i+15];
2197: while (n-->0) {
2198: y[16*(*idx)] += alpha1*(*v);
2199: y[16*(*idx)+1] += alpha2*(*v);
2200: y[16*(*idx)+2] += alpha3*(*v);
2201: y[16*(*idx)+3] += alpha4*(*v);
2202: y[16*(*idx)+4] += alpha5*(*v);
2203: y[16*(*idx)+5] += alpha6*(*v);
2204: y[16*(*idx)+6] += alpha7*(*v);
2205: y[16*(*idx)+7] += alpha8*(*v);
2206: y[16*(*idx)+8] += alpha9*(*v);
2207: y[16*(*idx)+9] += alpha10*(*v);
2208: y[16*(*idx)+10] += alpha11*(*v);
2209: y[16*(*idx)+11] += alpha12*(*v);
2210: y[16*(*idx)+12] += alpha13*(*v);
2211: y[16*(*idx)+13] += alpha14*(*v);
2212: y[16*(*idx)+14] += alpha15*(*v);
2213: y[16*(*idx)+15] += alpha16*(*v);
2214: idx++; v++;
2215: }
2216: }
2217: PetscLogFlops(32.0*a->nz);
2218: VecRestoreArrayRead(xx,&x);
2219: VecRestoreArray(yy,&y);
2220: return(0);
2221: }
2223: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2224: {
2225: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2226: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2227: const PetscScalar *x,*v;
2228: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2229: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2230: PetscErrorCode ierr;
2231: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2232: PetscInt n,i,jrow,j;
2235: if (yy != zz) {VecCopy(yy,zz);}
2236: VecGetArrayRead(xx,&x);
2237: VecGetArray(zz,&y);
2238: idx = a->j;
2239: v = a->a;
2240: ii = a->i;
2242: for (i=0; i<m; i++) {
2243: jrow = ii[i];
2244: n = ii[i+1] - jrow;
2245: sum1 = 0.0;
2246: sum2 = 0.0;
2247: sum3 = 0.0;
2248: sum4 = 0.0;
2249: sum5 = 0.0;
2250: sum6 = 0.0;
2251: sum7 = 0.0;
2252: sum8 = 0.0;
2253: sum9 = 0.0;
2254: sum10 = 0.0;
2255: sum11 = 0.0;
2256: sum12 = 0.0;
2257: sum13 = 0.0;
2258: sum14 = 0.0;
2259: sum15 = 0.0;
2260: sum16 = 0.0;
2261: for (j=0; j<n; j++) {
2262: sum1 += v[jrow]*x[16*idx[jrow]];
2263: sum2 += v[jrow]*x[16*idx[jrow]+1];
2264: sum3 += v[jrow]*x[16*idx[jrow]+2];
2265: sum4 += v[jrow]*x[16*idx[jrow]+3];
2266: sum5 += v[jrow]*x[16*idx[jrow]+4];
2267: sum6 += v[jrow]*x[16*idx[jrow]+5];
2268: sum7 += v[jrow]*x[16*idx[jrow]+6];
2269: sum8 += v[jrow]*x[16*idx[jrow]+7];
2270: sum9 += v[jrow]*x[16*idx[jrow]+8];
2271: sum10 += v[jrow]*x[16*idx[jrow]+9];
2272: sum11 += v[jrow]*x[16*idx[jrow]+10];
2273: sum12 += v[jrow]*x[16*idx[jrow]+11];
2274: sum13 += v[jrow]*x[16*idx[jrow]+12];
2275: sum14 += v[jrow]*x[16*idx[jrow]+13];
2276: sum15 += v[jrow]*x[16*idx[jrow]+14];
2277: sum16 += v[jrow]*x[16*idx[jrow]+15];
2278: jrow++;
2279: }
2280: y[16*i] += sum1;
2281: y[16*i+1] += sum2;
2282: y[16*i+2] += sum3;
2283: y[16*i+3] += sum4;
2284: y[16*i+4] += sum5;
2285: y[16*i+5] += sum6;
2286: y[16*i+6] += sum7;
2287: y[16*i+7] += sum8;
2288: y[16*i+8] += sum9;
2289: y[16*i+9] += sum10;
2290: y[16*i+10] += sum11;
2291: y[16*i+11] += sum12;
2292: y[16*i+12] += sum13;
2293: y[16*i+13] += sum14;
2294: y[16*i+14] += sum15;
2295: y[16*i+15] += sum16;
2296: }
2298: PetscLogFlops(32.0*a->nz);
2299: VecRestoreArrayRead(xx,&x);
2300: VecRestoreArray(zz,&y);
2301: return(0);
2302: }
2304: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2305: {
2306: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2307: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2308: const PetscScalar *x,*v;
2309: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2310: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2311: PetscErrorCode ierr;
2312: const PetscInt m = b->AIJ->rmap->n,*idx;
2313: PetscInt n,i;
2316: if (yy != zz) {VecCopy(yy,zz);}
2317: VecGetArrayRead(xx,&x);
2318: VecGetArray(zz,&y);
2319: for (i=0; i<m; i++) {
2320: idx = a->j + a->i[i];
2321: v = a->a + a->i[i];
2322: n = a->i[i+1] - a->i[i];
2323: alpha1 = x[16*i];
2324: alpha2 = x[16*i+1];
2325: alpha3 = x[16*i+2];
2326: alpha4 = x[16*i+3];
2327: alpha5 = x[16*i+4];
2328: alpha6 = x[16*i+5];
2329: alpha7 = x[16*i+6];
2330: alpha8 = x[16*i+7];
2331: alpha9 = x[16*i+8];
2332: alpha10 = x[16*i+9];
2333: alpha11 = x[16*i+10];
2334: alpha12 = x[16*i+11];
2335: alpha13 = x[16*i+12];
2336: alpha14 = x[16*i+13];
2337: alpha15 = x[16*i+14];
2338: alpha16 = x[16*i+15];
2339: while (n-->0) {
2340: y[16*(*idx)] += alpha1*(*v);
2341: y[16*(*idx)+1] += alpha2*(*v);
2342: y[16*(*idx)+2] += alpha3*(*v);
2343: y[16*(*idx)+3] += alpha4*(*v);
2344: y[16*(*idx)+4] += alpha5*(*v);
2345: y[16*(*idx)+5] += alpha6*(*v);
2346: y[16*(*idx)+6] += alpha7*(*v);
2347: y[16*(*idx)+7] += alpha8*(*v);
2348: y[16*(*idx)+8] += alpha9*(*v);
2349: y[16*(*idx)+9] += alpha10*(*v);
2350: y[16*(*idx)+10] += alpha11*(*v);
2351: y[16*(*idx)+11] += alpha12*(*v);
2352: y[16*(*idx)+12] += alpha13*(*v);
2353: y[16*(*idx)+13] += alpha14*(*v);
2354: y[16*(*idx)+14] += alpha15*(*v);
2355: y[16*(*idx)+15] += alpha16*(*v);
2356: idx++; v++;
2357: }
2358: }
2359: PetscLogFlops(32.0*a->nz);
2360: VecRestoreArrayRead(xx,&x);
2361: VecRestoreArray(zz,&y);
2362: return(0);
2363: }
2365: /*--------------------------------------------------------------------------------------------*/
2366: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2367: {
2368: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2369: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2370: const PetscScalar *x,*v;
2371: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2372: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2373: PetscErrorCode ierr;
2374: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2375: PetscInt nonzerorow=0,n,i,jrow,j;
2378: VecGetArrayRead(xx,&x);
2379: VecGetArray(yy,&y);
2380: idx = a->j;
2381: v = a->a;
2382: ii = a->i;
2384: for (i=0; i<m; i++) {
2385: jrow = ii[i];
2386: n = ii[i+1] - jrow;
2387: sum1 = 0.0;
2388: sum2 = 0.0;
2389: sum3 = 0.0;
2390: sum4 = 0.0;
2391: sum5 = 0.0;
2392: sum6 = 0.0;
2393: sum7 = 0.0;
2394: sum8 = 0.0;
2395: sum9 = 0.0;
2396: sum10 = 0.0;
2397: sum11 = 0.0;
2398: sum12 = 0.0;
2399: sum13 = 0.0;
2400: sum14 = 0.0;
2401: sum15 = 0.0;
2402: sum16 = 0.0;
2403: sum17 = 0.0;
2404: sum18 = 0.0;
2406: nonzerorow += (n>0);
2407: for (j=0; j<n; j++) {
2408: sum1 += v[jrow]*x[18*idx[jrow]];
2409: sum2 += v[jrow]*x[18*idx[jrow]+1];
2410: sum3 += v[jrow]*x[18*idx[jrow]+2];
2411: sum4 += v[jrow]*x[18*idx[jrow]+3];
2412: sum5 += v[jrow]*x[18*idx[jrow]+4];
2413: sum6 += v[jrow]*x[18*idx[jrow]+5];
2414: sum7 += v[jrow]*x[18*idx[jrow]+6];
2415: sum8 += v[jrow]*x[18*idx[jrow]+7];
2416: sum9 += v[jrow]*x[18*idx[jrow]+8];
2417: sum10 += v[jrow]*x[18*idx[jrow]+9];
2418: sum11 += v[jrow]*x[18*idx[jrow]+10];
2419: sum12 += v[jrow]*x[18*idx[jrow]+11];
2420: sum13 += v[jrow]*x[18*idx[jrow]+12];
2421: sum14 += v[jrow]*x[18*idx[jrow]+13];
2422: sum15 += v[jrow]*x[18*idx[jrow]+14];
2423: sum16 += v[jrow]*x[18*idx[jrow]+15];
2424: sum17 += v[jrow]*x[18*idx[jrow]+16];
2425: sum18 += v[jrow]*x[18*idx[jrow]+17];
2426: jrow++;
2427: }
2428: y[18*i] = sum1;
2429: y[18*i+1] = sum2;
2430: y[18*i+2] = sum3;
2431: y[18*i+3] = sum4;
2432: y[18*i+4] = sum5;
2433: y[18*i+5] = sum6;
2434: y[18*i+6] = sum7;
2435: y[18*i+7] = sum8;
2436: y[18*i+8] = sum9;
2437: y[18*i+9] = sum10;
2438: y[18*i+10] = sum11;
2439: y[18*i+11] = sum12;
2440: y[18*i+12] = sum13;
2441: y[18*i+13] = sum14;
2442: y[18*i+14] = sum15;
2443: y[18*i+15] = sum16;
2444: y[18*i+16] = sum17;
2445: y[18*i+17] = sum18;
2446: }
2448: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2449: VecRestoreArrayRead(xx,&x);
2450: VecRestoreArray(yy,&y);
2451: return(0);
2452: }
2454: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2455: {
2456: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2457: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2458: const PetscScalar *x,*v;
2459: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2460: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2461: PetscErrorCode ierr;
2462: const PetscInt m = b->AIJ->rmap->n,*idx;
2463: PetscInt n,i;
2466: VecSet(yy,0.0);
2467: VecGetArrayRead(xx,&x);
2468: VecGetArray(yy,&y);
2470: for (i=0; i<m; i++) {
2471: idx = a->j + a->i[i];
2472: v = a->a + a->i[i];
2473: n = a->i[i+1] - a->i[i];
2474: alpha1 = x[18*i];
2475: alpha2 = x[18*i+1];
2476: alpha3 = x[18*i+2];
2477: alpha4 = x[18*i+3];
2478: alpha5 = x[18*i+4];
2479: alpha6 = x[18*i+5];
2480: alpha7 = x[18*i+6];
2481: alpha8 = x[18*i+7];
2482: alpha9 = x[18*i+8];
2483: alpha10 = x[18*i+9];
2484: alpha11 = x[18*i+10];
2485: alpha12 = x[18*i+11];
2486: alpha13 = x[18*i+12];
2487: alpha14 = x[18*i+13];
2488: alpha15 = x[18*i+14];
2489: alpha16 = x[18*i+15];
2490: alpha17 = x[18*i+16];
2491: alpha18 = x[18*i+17];
2492: while (n-->0) {
2493: y[18*(*idx)] += alpha1*(*v);
2494: y[18*(*idx)+1] += alpha2*(*v);
2495: y[18*(*idx)+2] += alpha3*(*v);
2496: y[18*(*idx)+3] += alpha4*(*v);
2497: y[18*(*idx)+4] += alpha5*(*v);
2498: y[18*(*idx)+5] += alpha6*(*v);
2499: y[18*(*idx)+6] += alpha7*(*v);
2500: y[18*(*idx)+7] += alpha8*(*v);
2501: y[18*(*idx)+8] += alpha9*(*v);
2502: y[18*(*idx)+9] += alpha10*(*v);
2503: y[18*(*idx)+10] += alpha11*(*v);
2504: y[18*(*idx)+11] += alpha12*(*v);
2505: y[18*(*idx)+12] += alpha13*(*v);
2506: y[18*(*idx)+13] += alpha14*(*v);
2507: y[18*(*idx)+14] += alpha15*(*v);
2508: y[18*(*idx)+15] += alpha16*(*v);
2509: y[18*(*idx)+16] += alpha17*(*v);
2510: y[18*(*idx)+17] += alpha18*(*v);
2511: idx++; v++;
2512: }
2513: }
2514: PetscLogFlops(36.0*a->nz);
2515: VecRestoreArrayRead(xx,&x);
2516: VecRestoreArray(yy,&y);
2517: return(0);
2518: }
2520: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2521: {
2522: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2523: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2524: const PetscScalar *x,*v;
2525: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2526: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2527: PetscErrorCode ierr;
2528: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2529: PetscInt n,i,jrow,j;
2532: if (yy != zz) {VecCopy(yy,zz);}
2533: VecGetArrayRead(xx,&x);
2534: VecGetArray(zz,&y);
2535: idx = a->j;
2536: v = a->a;
2537: ii = a->i;
2539: for (i=0; i<m; i++) {
2540: jrow = ii[i];
2541: n = ii[i+1] - jrow;
2542: sum1 = 0.0;
2543: sum2 = 0.0;
2544: sum3 = 0.0;
2545: sum4 = 0.0;
2546: sum5 = 0.0;
2547: sum6 = 0.0;
2548: sum7 = 0.0;
2549: sum8 = 0.0;
2550: sum9 = 0.0;
2551: sum10 = 0.0;
2552: sum11 = 0.0;
2553: sum12 = 0.0;
2554: sum13 = 0.0;
2555: sum14 = 0.0;
2556: sum15 = 0.0;
2557: sum16 = 0.0;
2558: sum17 = 0.0;
2559: sum18 = 0.0;
2560: for (j=0; j<n; j++) {
2561: sum1 += v[jrow]*x[18*idx[jrow]];
2562: sum2 += v[jrow]*x[18*idx[jrow]+1];
2563: sum3 += v[jrow]*x[18*idx[jrow]+2];
2564: sum4 += v[jrow]*x[18*idx[jrow]+3];
2565: sum5 += v[jrow]*x[18*idx[jrow]+4];
2566: sum6 += v[jrow]*x[18*idx[jrow]+5];
2567: sum7 += v[jrow]*x[18*idx[jrow]+6];
2568: sum8 += v[jrow]*x[18*idx[jrow]+7];
2569: sum9 += v[jrow]*x[18*idx[jrow]+8];
2570: sum10 += v[jrow]*x[18*idx[jrow]+9];
2571: sum11 += v[jrow]*x[18*idx[jrow]+10];
2572: sum12 += v[jrow]*x[18*idx[jrow]+11];
2573: sum13 += v[jrow]*x[18*idx[jrow]+12];
2574: sum14 += v[jrow]*x[18*idx[jrow]+13];
2575: sum15 += v[jrow]*x[18*idx[jrow]+14];
2576: sum16 += v[jrow]*x[18*idx[jrow]+15];
2577: sum17 += v[jrow]*x[18*idx[jrow]+16];
2578: sum18 += v[jrow]*x[18*idx[jrow]+17];
2579: jrow++;
2580: }
2581: y[18*i] += sum1;
2582: y[18*i+1] += sum2;
2583: y[18*i+2] += sum3;
2584: y[18*i+3] += sum4;
2585: y[18*i+4] += sum5;
2586: y[18*i+5] += sum6;
2587: y[18*i+6] += sum7;
2588: y[18*i+7] += sum8;
2589: y[18*i+8] += sum9;
2590: y[18*i+9] += sum10;
2591: y[18*i+10] += sum11;
2592: y[18*i+11] += sum12;
2593: y[18*i+12] += sum13;
2594: y[18*i+13] += sum14;
2595: y[18*i+14] += sum15;
2596: y[18*i+15] += sum16;
2597: y[18*i+16] += sum17;
2598: y[18*i+17] += sum18;
2599: }
2601: PetscLogFlops(36.0*a->nz);
2602: VecRestoreArrayRead(xx,&x);
2603: VecRestoreArray(zz,&y);
2604: return(0);
2605: }
2607: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608: {
2609: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2610: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2611: const PetscScalar *x,*v;
2612: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2613: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2614: PetscErrorCode ierr;
2615: const PetscInt m = b->AIJ->rmap->n,*idx;
2616: PetscInt n,i;
2619: if (yy != zz) {VecCopy(yy,zz);}
2620: VecGetArrayRead(xx,&x);
2621: VecGetArray(zz,&y);
2622: for (i=0; i<m; i++) {
2623: idx = a->j + a->i[i];
2624: v = a->a + a->i[i];
2625: n = a->i[i+1] - a->i[i];
2626: alpha1 = x[18*i];
2627: alpha2 = x[18*i+1];
2628: alpha3 = x[18*i+2];
2629: alpha4 = x[18*i+3];
2630: alpha5 = x[18*i+4];
2631: alpha6 = x[18*i+5];
2632: alpha7 = x[18*i+6];
2633: alpha8 = x[18*i+7];
2634: alpha9 = x[18*i+8];
2635: alpha10 = x[18*i+9];
2636: alpha11 = x[18*i+10];
2637: alpha12 = x[18*i+11];
2638: alpha13 = x[18*i+12];
2639: alpha14 = x[18*i+13];
2640: alpha15 = x[18*i+14];
2641: alpha16 = x[18*i+15];
2642: alpha17 = x[18*i+16];
2643: alpha18 = x[18*i+17];
2644: while (n-->0) {
2645: y[18*(*idx)] += alpha1*(*v);
2646: y[18*(*idx)+1] += alpha2*(*v);
2647: y[18*(*idx)+2] += alpha3*(*v);
2648: y[18*(*idx)+3] += alpha4*(*v);
2649: y[18*(*idx)+4] += alpha5*(*v);
2650: y[18*(*idx)+5] += alpha6*(*v);
2651: y[18*(*idx)+6] += alpha7*(*v);
2652: y[18*(*idx)+7] += alpha8*(*v);
2653: y[18*(*idx)+8] += alpha9*(*v);
2654: y[18*(*idx)+9] += alpha10*(*v);
2655: y[18*(*idx)+10] += alpha11*(*v);
2656: y[18*(*idx)+11] += alpha12*(*v);
2657: y[18*(*idx)+12] += alpha13*(*v);
2658: y[18*(*idx)+13] += alpha14*(*v);
2659: y[18*(*idx)+14] += alpha15*(*v);
2660: y[18*(*idx)+15] += alpha16*(*v);
2661: y[18*(*idx)+16] += alpha17*(*v);
2662: y[18*(*idx)+17] += alpha18*(*v);
2663: idx++; v++;
2664: }
2665: }
2666: PetscLogFlops(36.0*a->nz);
2667: VecRestoreArrayRead(xx,&x);
2668: VecRestoreArray(zz,&y);
2669: return(0);
2670: }
2672: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2673: {
2674: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2675: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2676: const PetscScalar *x,*v;
2677: PetscScalar *y,*sums;
2678: PetscErrorCode ierr;
2679: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2680: PetscInt n,i,jrow,j,dof = b->dof,k;
2683: VecGetArrayRead(xx,&x);
2684: VecSet(yy,0.0);
2685: VecGetArray(yy,&y);
2686: idx = a->j;
2687: v = a->a;
2688: ii = a->i;
2690: for (i=0; i<m; i++) {
2691: jrow = ii[i];
2692: n = ii[i+1] - jrow;
2693: sums = y + dof*i;
2694: for (j=0; j<n; j++) {
2695: for (k=0; k<dof; k++) {
2696: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2697: }
2698: jrow++;
2699: }
2700: }
2702: PetscLogFlops(2.0*dof*a->nz);
2703: VecRestoreArrayRead(xx,&x);
2704: VecRestoreArray(yy,&y);
2705: return(0);
2706: }
2708: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2709: {
2710: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2712: const PetscScalar *x,*v;
2713: PetscScalar *y,*sums;
2714: PetscErrorCode ierr;
2715: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2716: PetscInt n,i,jrow,j,dof = b->dof,k;
2719: if (yy != zz) {VecCopy(yy,zz);}
2720: VecGetArrayRead(xx,&x);
2721: VecGetArray(zz,&y);
2722: idx = a->j;
2723: v = a->a;
2724: ii = a->i;
2726: for (i=0; i<m; i++) {
2727: jrow = ii[i];
2728: n = ii[i+1] - jrow;
2729: sums = y + dof*i;
2730: for (j=0; j<n; j++) {
2731: for (k=0; k<dof; k++) {
2732: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2733: }
2734: jrow++;
2735: }
2736: }
2738: PetscLogFlops(2.0*dof*a->nz);
2739: VecRestoreArrayRead(xx,&x);
2740: VecRestoreArray(zz,&y);
2741: return(0);
2742: }
2744: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2745: {
2746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2748: const PetscScalar *x,*v,*alpha;
2749: PetscScalar *y;
2750: PetscErrorCode ierr;
2751: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2752: PetscInt n,i,k;
2755: VecGetArrayRead(xx,&x);
2756: VecSet(yy,0.0);
2757: VecGetArray(yy,&y);
2758: for (i=0; i<m; i++) {
2759: idx = a->j + a->i[i];
2760: v = a->a + a->i[i];
2761: n = a->i[i+1] - a->i[i];
2762: alpha = x + dof*i;
2763: while (n-->0) {
2764: for (k=0; k<dof; k++) {
2765: y[dof*(*idx)+k] += alpha[k]*(*v);
2766: }
2767: idx++; v++;
2768: }
2769: }
2770: PetscLogFlops(2.0*dof*a->nz);
2771: VecRestoreArrayRead(xx,&x);
2772: VecRestoreArray(yy,&y);
2773: return(0);
2774: }
2776: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2777: {
2778: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2779: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2780: const PetscScalar *x,*v,*alpha;
2781: PetscScalar *y;
2782: PetscErrorCode ierr;
2783: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2784: PetscInt n,i,k;
2787: if (yy != zz) {VecCopy(yy,zz);}
2788: VecGetArrayRead(xx,&x);
2789: VecGetArray(zz,&y);
2790: for (i=0; i<m; i++) {
2791: idx = a->j + a->i[i];
2792: v = a->a + a->i[i];
2793: n = a->i[i+1] - a->i[i];
2794: alpha = x + dof*i;
2795: while (n-->0) {
2796: for (k=0; k<dof; k++) {
2797: y[dof*(*idx)+k] += alpha[k]*(*v);
2798: }
2799: idx++; v++;
2800: }
2801: }
2802: PetscLogFlops(2.0*dof*a->nz);
2803: VecRestoreArrayRead(xx,&x);
2804: VecRestoreArray(zz,&y);
2805: return(0);
2806: }
2808: /*===================================================================================*/
2809: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2810: {
2811: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2815: /* start the scatter */
2816: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2817: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2818: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2819: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2820: return(0);
2821: }
2823: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2824: {
2825: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2829: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2830: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2831: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2832: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2833: return(0);
2834: }
2836: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2837: {
2838: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2842: /* start the scatter */
2843: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2844: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2845: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2846: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2847: return(0);
2848: }
2850: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2851: {
2852: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2856: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2857: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2859: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2860: return(0);
2861: }
2863: /* ----------------------------------------------------------------*/
2864: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2865: {
2866: PetscErrorCode ierr;
2867: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2868: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2869: Mat P =pp->AIJ;
2870: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2871: PetscInt *pti,*ptj,*ptJ;
2872: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2873: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2874: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2875: MatScalar *ca;
2876: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2879: /* Get ij structure of P^T */
2880: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2882: cn = pn*ppdof;
2883: /* Allocate ci array, arrays for fill computation and */
2884: /* free space for accumulating nonzero column info */
2885: PetscMalloc1(cn+1,&ci);
2886: ci[0] = 0;
2888: /* Work arrays for rows of P^T*A */
2889: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2890: PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2891: PetscMemzero(denserow,cn*sizeof(PetscInt));
2893: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2894: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2895: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2896: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2897: current_space = free_space;
2899: /* Determine symbolic info for each row of C: */
2900: for (i=0; i<pn; i++) {
2901: ptnzi = pti[i+1] - pti[i];
2902: ptJ = ptj + pti[i];
2903: for (dof=0; dof<ppdof; dof++) {
2904: ptanzi = 0;
2905: /* Determine symbolic row of PtA: */
2906: for (j=0; j<ptnzi; j++) {
2907: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2908: arow = ptJ[j]*ppdof + dof;
2909: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2910: anzj = ai[arow+1] - ai[arow];
2911: ajj = aj + ai[arow];
2912: for (k=0; k<anzj; k++) {
2913: if (!ptadenserow[ajj[k]]) {
2914: ptadenserow[ajj[k]] = -1;
2915: ptasparserow[ptanzi++] = ajj[k];
2916: }
2917: }
2918: }
2919: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2920: ptaj = ptasparserow;
2921: cnzi = 0;
2922: for (j=0; j<ptanzi; j++) {
2923: /* Get offset within block of P */
2924: pshift = *ptaj%ppdof;
2925: /* Get block row of P */
2926: prow = (*ptaj++)/ppdof; /* integer division */
2927: /* P has same number of nonzeros per row as the compressed form */
2928: pnzj = pi[prow+1] - pi[prow];
2929: pjj = pj + pi[prow];
2930: for (k=0;k<pnzj;k++) {
2931: /* Locations in C are shifted by the offset within the block */
2932: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2933: if (!denserow[pjj[k]*ppdof+pshift]) {
2934: denserow[pjj[k]*ppdof+pshift] = -1;
2935: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2936: }
2937: }
2938: }
2940: /* sort sparserow */
2941: PetscSortInt(cnzi,sparserow);
2943: /* If free space is not available, make more free space */
2944: /* Double the amount of total space in the list */
2945: if (current_space->local_remaining<cnzi) {
2946: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
2947: }
2949: /* Copy data into free space, and zero out denserows */
2950: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2952: current_space->array += cnzi;
2953: current_space->local_used += cnzi;
2954: current_space->local_remaining -= cnzi;
2956: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2957: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2959: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2960: /* For now, we will recompute what is needed. */
2961: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2962: }
2963: }
2964: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2965: /* Allocate space for cj, initialize cj, and */
2966: /* destroy list of free space and other temporary array(s) */
2967: PetscMalloc1(ci[cn]+1,&cj);
2968: PetscFreeSpaceContiguous(&free_space,cj);
2969: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
2971: /* Allocate space for ca */
2972: PetscCalloc1(ci[cn]+1,&ca);
2974: /* put together the new matrix */
2975: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
2976: MatSetBlockSize(*C,pp->dof);
2978: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2979: /* Since these are PETSc arrays, change flags to free them as necessary. */
2980: c = (Mat_SeqAIJ*)((*C)->data);
2981: c->free_a = PETSC_TRUE;
2982: c->free_ij = PETSC_TRUE;
2983: c->nonew = 0;
2985: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2987: /* Clean up. */
2988: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2989: return(0);
2990: }
2992: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2993: {
2994: /* This routine requires testing -- first draft only */
2995: PetscErrorCode ierr;
2996: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2997: Mat P =pp->AIJ;
2998: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
2999: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3000: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3001: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3002: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3003: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3004: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3005: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3006: MatScalar *ca=c->a,*caj,*apa;
3009: /* Allocate temporary array for storage of one row of A*P */
3010: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3012: /* Clear old values in C */
3013: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3015: for (i=0; i<am; i++) {
3016: /* Form sparse row of A*P */
3017: anzi = ai[i+1] - ai[i];
3018: apnzj = 0;
3019: for (j=0; j<anzi; j++) {
3020: /* Get offset within block of P */
3021: pshift = *aj%ppdof;
3022: /* Get block row of P */
3023: prow = *aj++/ppdof; /* integer division */
3024: pnzj = pi[prow+1] - pi[prow];
3025: pjj = pj + pi[prow];
3026: paj = pa + pi[prow];
3027: for (k=0; k<pnzj; k++) {
3028: poffset = pjj[k]*ppdof+pshift;
3029: if (!apjdense[poffset]) {
3030: apjdense[poffset] = -1;
3031: apj[apnzj++] = poffset;
3032: }
3033: apa[poffset] += (*aa)*paj[k];
3034: }
3035: PetscLogFlops(2.0*pnzj);
3036: aa++;
3037: }
3039: /* Sort the j index array for quick sparse axpy. */
3040: /* Note: a array does not need sorting as it is in dense storage locations. */
3041: PetscSortInt(apnzj,apj);
3043: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3044: prow = i/ppdof; /* integer division */
3045: pshift = i%ppdof;
3046: poffset = pi[prow];
3047: pnzi = pi[prow+1] - poffset;
3048: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3049: pJ = pj+poffset;
3050: pA = pa+poffset;
3051: for (j=0; j<pnzi; j++) {
3052: crow = (*pJ)*ppdof+pshift;
3053: cjj = cj + ci[crow];
3054: caj = ca + ci[crow];
3055: pJ++;
3056: /* Perform sparse axpy operation. Note cjj includes apj. */
3057: for (k=0,nextap=0; nextap<apnzj; k++) {
3058: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3059: }
3060: PetscLogFlops(2.0*apnzj);
3061: pA++;
3062: }
3064: /* Zero the current row info for A*P */
3065: for (j=0; j<apnzj; j++) {
3066: apa[apj[j]] = 0.;
3067: apjdense[apj[j]] = 0;
3068: }
3069: }
3071: /* Assemble the final matrix and clean up */
3072: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3073: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3074: PetscFree3(apa,apj,apjdense);
3075: return(0);
3076: }
3078: PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3079: {
3083: if (scall == MAT_INITIAL_MATRIX) {
3084: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3085: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3086: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3087: }
3088: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3089: MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3090: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3091: return(0);
3092: }
3094: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3095: {
3099: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3100: MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);
3101: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3102: return(0);
3103: }
3105: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3106: {
3108: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3109: return(0);
3110: }
3112: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3113: {
3117: if (scall == MAT_INITIAL_MATRIX) {
3118: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3119: MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3120: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3121: }
3122: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3123: ((*C)->ops->ptapnumeric)(A,P,*C);
3124: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3125: return(0);
3126: }
3128: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3129: {
3130: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3131: Mat a = b->AIJ,B;
3132: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3134: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3135: PetscInt *cols;
3136: PetscScalar *vals;
3139: MatGetSize(a,&m,&n);
3140: PetscMalloc1(dof*m,&ilen);
3141: for (i=0; i<m; i++) {
3142: nmax = PetscMax(nmax,aij->ilen[i]);
3143: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3144: }
3145: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3146: PetscFree(ilen);
3147: PetscMalloc1(nmax,&icols);
3148: ii = 0;
3149: for (i=0; i<m; i++) {
3150: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3151: for (j=0; j<dof; j++) {
3152: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3153: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3154: ii++;
3155: }
3156: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3157: }
3158: PetscFree(icols);
3159: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3160: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3162: if (reuse == MAT_INPLACE_MATRIX) {
3163: MatHeaderReplace(A,&B);
3164: } else {
3165: *newmat = B;
3166: }
3167: return(0);
3168: }
3170: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3172: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3173: {
3174: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3175: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3176: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3177: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3178: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3179: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3180: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3181: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3182: PetscInt rstart,cstart,*garray,ii,k;
3184: PetscScalar *vals,*ovals;
3187: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3188: for (i=0; i<A->rmap->n/dof; i++) {
3189: nmax = PetscMax(nmax,AIJ->ilen[i]);
3190: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3191: for (j=0; j<dof; j++) {
3192: dnz[dof*i+j] = AIJ->ilen[i];
3193: onz[dof*i+j] = OAIJ->ilen[i];
3194: }
3195: }
3196: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3197: MatSetBlockSize(B,dof);
3198: PetscFree2(dnz,onz);
3200: PetscMalloc2(nmax,&icols,onmax,&oicols);
3201: rstart = dof*maij->A->rmap->rstart;
3202: cstart = dof*maij->A->cmap->rstart;
3203: garray = mpiaij->garray;
3205: ii = rstart;
3206: for (i=0; i<A->rmap->n/dof; i++) {
3207: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3208: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3209: for (j=0; j<dof; j++) {
3210: for (k=0; k<ncols; k++) {
3211: icols[k] = cstart + dof*cols[k]+j;
3212: }
3213: for (k=0; k<oncols; k++) {
3214: oicols[k] = dof*garray[ocols[k]]+j;
3215: }
3216: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3217: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3218: ii++;
3219: }
3220: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3221: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3222: }
3223: PetscFree2(icols,oicols);
3225: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3226: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3228: if (reuse == MAT_INPLACE_MATRIX) {
3229: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3230: ((PetscObject)A)->refct = 1;
3232: MatHeaderReplace(A,&B);
3234: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3235: } else {
3236: *newmat = B;
3237: }
3238: return(0);
3239: }
3241: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3242: {
3244: Mat A;
3247: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3248: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3249: MatDestroy(&A);
3250: return(0);
3251: }
3253: /* ---------------------------------------------------------------------------------- */
3254: /*@
3255: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3256: operations for multicomponent problems. It interpolates each component the same
3257: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3258: and MATMPIAIJ for distributed matrices.
3260: Collective
3262: Input Parameters:
3263: + A - the AIJ matrix describing the action on blocks
3264: - dof - the block size (number of components per node)
3266: Output Parameter:
3267: . maij - the new MAIJ matrix
3269: Operations provided:
3270: + MatMult
3271: . MatMultTranspose
3272: . MatMultAdd
3273: . MatMultTransposeAdd
3274: - MatView
3276: Level: advanced
3278: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3279: @*/
3280: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3281: {
3283: PetscMPIInt size;
3284: PetscInt n;
3285: Mat B;
3288: PetscObjectReference((PetscObject)A);
3290: if (dof == 1) *maij = A;
3291: else {
3292: MatCreate(PetscObjectComm((PetscObject)A),&B);
3293: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3294: PetscLayoutSetBlockSize(B->rmap,dof);
3295: PetscLayoutSetBlockSize(B->cmap,dof);
3296: PetscLayoutSetUp(B->rmap);
3297: PetscLayoutSetUp(B->cmap);
3299: B->assembled = PETSC_TRUE;
3301: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3302: if (size == 1) {
3303: Mat_SeqMAIJ *b;
3305: MatSetType(B,MATSEQMAIJ);
3307: B->ops->setup = NULL;
3308: B->ops->destroy = MatDestroy_SeqMAIJ;
3309: B->ops->view = MatView_SeqMAIJ;
3310: b = (Mat_SeqMAIJ*)B->data;
3311: b->dof = dof;
3312: b->AIJ = A;
3314: if (dof == 2) {
3315: B->ops->mult = MatMult_SeqMAIJ_2;
3316: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3317: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3318: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319: } else if (dof == 3) {
3320: B->ops->mult = MatMult_SeqMAIJ_3;
3321: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3322: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3323: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324: } else if (dof == 4) {
3325: B->ops->mult = MatMult_SeqMAIJ_4;
3326: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3327: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3328: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329: } else if (dof == 5) {
3330: B->ops->mult = MatMult_SeqMAIJ_5;
3331: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3332: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3333: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3334: } else if (dof == 6) {
3335: B->ops->mult = MatMult_SeqMAIJ_6;
3336: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3337: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3338: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339: } else if (dof == 7) {
3340: B->ops->mult = MatMult_SeqMAIJ_7;
3341: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3342: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3343: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3344: } else if (dof == 8) {
3345: B->ops->mult = MatMult_SeqMAIJ_8;
3346: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3347: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3348: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3349: } else if (dof == 9) {
3350: B->ops->mult = MatMult_SeqMAIJ_9;
3351: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3352: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3353: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354: } else if (dof == 10) {
3355: B->ops->mult = MatMult_SeqMAIJ_10;
3356: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3357: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3358: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359: } else if (dof == 11) {
3360: B->ops->mult = MatMult_SeqMAIJ_11;
3361: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3362: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3363: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3364: } else if (dof == 16) {
3365: B->ops->mult = MatMult_SeqMAIJ_16;
3366: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3367: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3368: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369: } else if (dof == 18) {
3370: B->ops->mult = MatMult_SeqMAIJ_18;
3371: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3372: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3373: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3374: } else {
3375: B->ops->mult = MatMult_SeqMAIJ_N;
3376: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3377: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3378: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3379: }
3380: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3381: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3382: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3383: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3384: } else {
3385: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3386: Mat_MPIMAIJ *b;
3387: IS from,to;
3388: Vec gvec;
3390: MatSetType(B,MATMPIMAIJ);
3392: B->ops->setup = NULL;
3393: B->ops->destroy = MatDestroy_MPIMAIJ;
3394: B->ops->view = MatView_MPIMAIJ;
3396: b = (Mat_MPIMAIJ*)B->data;
3397: b->dof = dof;
3398: b->A = A;
3400: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3401: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3403: VecGetSize(mpiaij->lvec,&n);
3404: VecCreate(PETSC_COMM_SELF,&b->w);
3405: VecSetSizes(b->w,n*dof,n*dof);
3406: VecSetBlockSize(b->w,dof);
3407: VecSetType(b->w,VECSEQ);
3409: /* create two temporary Index sets for build scatter gather */
3410: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3411: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3413: /* create temporary global vector to generate scatter context */
3414: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3416: /* generate the scatter context */
3417: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3419: ISDestroy(&from);
3420: ISDestroy(&to);
3421: VecDestroy(&gvec);
3423: B->ops->mult = MatMult_MPIMAIJ_dof;
3424: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3425: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3426: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3428: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3429: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3430: }
3431: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3432: MatSetUp(B);
3433: *maij = B;
3434: MatViewFromOptions(B,NULL,"-mat_view");
3435: }
3436: return(0);
3437: }