Actual source code: kaij.c
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
33: Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
35: Input Parameter:
36: . A - the KAIJ matrix
38: Output Parameter:
39: . B - the AIJ matrix
41: Level: advanced
43: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
45: .seealso: MatCreateKAIJ()
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B)
48: {
50: PetscBool ismpikaij,isseqkaij;
53: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
54: PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
55: if (ismpikaij) {
56: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
58: *B = b->A;
59: } else if (isseqkaij) {
60: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
62: *B = b->AIJ;
63: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64: return(0);
65: }
67: /*@C
68: MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
70: Not Collective; the entire S is stored and returned independently on all processes.
72: Input Parameter:
73: . A - the KAIJ matrix
75: Output Parameters:
76: + m - the number of rows in S
77: . n - the number of columns in S
78: - S - the S matrix, in form of a scalar array in column-major format
80: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
82: Level: advanced
84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
90: if (m) *m = b->p;
91: if (n) *n = b->q;
92: if (S) *S = b->S;
93: return(0);
94: }
96: /*@C
97: MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
99: Not Collective; the entire S is stored and returned independently on all processes.
101: Input Parameter:
102: . A - the KAIJ matrix
104: Output Parameters:
105: + m - the number of rows in S
106: . n - the number of columns in S
107: - S - the S matrix, in form of a scalar array in column-major format
109: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
111: Level: advanced
113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119: if (m) *m = b->p;
120: if (n) *n = b->q;
121: if (S) *S = b->S;
122: return(0);
123: }
125: /*@C
126: MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
128: Not collective
130: Input Parameter:
131: . A - the KAIJ matrix
133: Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()
136: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137: If NULL is passed, it will not attempt to zero the array pointer.
139: Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {
147: if (S) *S = NULL;
148: PetscObjectStateIncrease((PetscObject)A);
149: return(0);
150: }
152: /*@C
153: MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
155: Not collective
157: Input Parameter:
158: . A - the KAIJ matrix
160: Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()
163: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164: If NULL is passed, it will not attempt to zero the array pointer.
166: Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172: if (S) *S = NULL;
173: return(0);
174: }
176: /*@C
177: MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
179: Not Collective; the entire T is stored and returned independently on all processes
181: Input Parameter:
182: . A - the KAIJ matrix
184: Output Parameters:
185: + m - the number of rows in T
186: . n - the number of columns in T
187: - T - the T matrix, in form of a scalar array in column-major format
189: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
191: Level: advanced
193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199: if (m) *m = b->p;
200: if (n) *n = b->q;
201: if (T) *T = b->T;
202: return(0);
203: }
205: /*@C
206: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
208: Not Collective; the entire T is stored and returned independently on all processes
210: Input Parameter:
211: . A - the KAIJ matrix
213: Output Parameters:
214: + m - the number of rows in T
215: . n - the number of columns in T
216: - T - the T matrix, in form of a scalar array in column-major format
218: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
220: Level: advanced
222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228: if (m) *m = b->p;
229: if (n) *n = b->q;
230: if (T) *T = b->T;
231: return(0);
232: }
234: /*@C
235: MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
237: Not collective
239: Input Parameter:
240: . A - the KAIJ matrix
242: Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()
245: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246: If NULL is passed, it will not attempt to zero the array pointer.
248: Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {
256: if (T) *T = NULL;
257: PetscObjectStateIncrease((PetscObject)A);
258: return(0);
259: }
261: /*@C
262: MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
264: Not collective
266: Input Parameter:
267: . A - the KAIJ matrix
269: Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()
272: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273: If NULL is passed, it will not attempt to zero the array pointer.
275: Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281: if (T) *T = NULL;
282: return(0);
283: }
285: /*@
286: MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
288: Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
290: Input Parameters:
291: + A - the KAIJ matrix
292: - B - the AIJ matrix
294: Notes:
295: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
298: Level: advanced
300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305: PetscMPIInt size;
308: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309: if (size == 1) {
310: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311: a->AIJ = B;
312: } else {
313: Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314: a->A = B;
315: }
316: PetscObjectReference((PetscObject)B);
317: return(0);
318: }
320: /*@C
321: MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
323: Logically Collective; the entire S is stored independently on all processes.
325: Input Parameters:
326: + A - the KAIJ matrix
327: . p - the number of rows in S
328: . q - the number of columns in S
329: - S - the S matrix, in form of a scalar array in column-major format
331: Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332: The S matrix is copied, so the user can destroy this array.
334: Level: Advanced
336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
344: PetscFree(a->S);
345: if (S) {
346: PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347: PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348: } else a->S = NULL;
350: a->p = p;
351: a->q = q;
352: return(0);
353: }
355: /*@C
356: MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
358: Logically Collective.
360: Input Parameter:
361: . A - the KAIJ matrix
363: Output Parameter:
364: . identity - the Boolean value
366: Level: Advanced
368: .seealso: MatKAIJGetS(), MatKAIJGetT()
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371: {
372: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373: PetscInt i,j;
376: if (a->p != a->q) {
377: *identity = PETSC_FALSE;
378: return(0);
379: } else *identity = PETSC_TRUE;
380: if (!a->isTI || a->S) {
381: for (i=0; i<a->p && *identity; i++) {
382: for (j=0; j<a->p && *identity; j++) {
383: if (i != j) {
384: if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385: if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386: } else {
387: if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388: if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389: }
390: }
391: }
392: }
393: return(0);
394: }
396: /*@C
397: MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
399: Logically Collective; the entire T is stored independently on all processes.
401: Input Parameters:
402: + A - the KAIJ matrix
403: . p - the number of rows in S
404: . q - the number of columns in S
405: - T - the T matrix, in form of a scalar array in column-major format
407: Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408: The T matrix is copied, so the user can destroy this array.
410: Level: Advanced
412: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413: @*/
414: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415: {
417: PetscInt i,j;
418: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
419: PetscBool isTI = PETSC_FALSE;
422: /* check if T is an identity matrix */
423: if (T && (p == q)) {
424: isTI = PETSC_TRUE;
425: for (i=0; i<p; i++) {
426: for (j=0; j<q; j++) {
427: if (i == j) {
428: /* diagonal term must be 1 */
429: if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430: } else {
431: /* off-diagonal term must be 0 */
432: if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433: }
434: }
435: }
436: }
437: a->isTI = isTI;
439: PetscFree(a->T);
440: if (T && (!isTI)) {
441: PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
442: PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
443: } else a->T = NULL;
445: a->p = p;
446: a->q = q;
447: return(0);
448: }
450: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451: {
453: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
456: MatDestroy(&b->AIJ);
457: PetscFree(b->S);
458: PetscFree(b->T);
459: PetscFree(b->ibdiag);
460: PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
461: PetscFree(A->data);
462: return(0);
463: }
465: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
466: {
467: PetscErrorCode ierr;
468: Mat_MPIKAIJ *a;
469: Mat_MPIAIJ *mpiaij;
470: PetscScalar *T;
471: PetscInt i,j;
472: PetscObjectState state;
475: a = (Mat_MPIKAIJ*)A->data;
476: mpiaij = (Mat_MPIAIJ*)a->A->data;
478: PetscObjectStateGet((PetscObject)a->A,&state);
479: if (state == a->state) {
480: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
481: return(0);
482: } else {
483: MatDestroy(&a->AIJ);
484: MatDestroy(&a->OAIJ);
485: if (a->isTI) {
486: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
487: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
488: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
489: * to pass in. */
490: PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
491: for (i=0; i<a->p; i++) {
492: for (j=0; j<a->q; j++) {
493: if (i==j) T[i+j*a->p] = 1.0;
494: else T[i+j*a->p] = 0.0;
495: }
496: }
497: } else T = a->T;
498: MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
499: MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
500: if (a->isTI) {
501: PetscFree(T);
502: }
503: a->state = state;
504: }
506: return(0);
507: }
509: PetscErrorCode MatSetUp_KAIJ(Mat A)
510: {
512: PetscInt n;
513: PetscMPIInt size;
514: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data;
517: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
518: if (size == 1) {
519: MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
520: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
521: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
522: PetscLayoutSetUp(A->rmap);
523: PetscLayoutSetUp(A->cmap);
524: } else {
525: Mat_MPIKAIJ *a;
526: Mat_MPIAIJ *mpiaij;
527: IS from,to;
528: Vec gvec;
530: a = (Mat_MPIKAIJ*)A->data;
531: mpiaij = (Mat_MPIAIJ*)a->A->data;
532: MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
533: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
534: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
535: PetscLayoutSetUp(A->rmap);
536: PetscLayoutSetUp(A->cmap);
538: MatKAIJ_build_AIJ_OAIJ(A);
540: VecGetSize(mpiaij->lvec,&n);
541: VecCreate(PETSC_COMM_SELF,&a->w);
542: VecSetSizes(a->w,n*a->q,n*a->q);
543: VecSetBlockSize(a->w,a->q);
544: VecSetType(a->w,VECSEQ);
546: /* create two temporary Index sets for build scatter gather */
547: ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
548: ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);
550: /* create temporary global vector to generate scatter context */
551: VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);
553: /* generate the scatter context */
554: VecScatterCreate(gvec,from,a->w,to,&a->ctx);
556: ISDestroy(&from);
557: ISDestroy(&to);
558: VecDestroy(&gvec);
559: }
561: A->assembled = PETSC_TRUE;
562: return(0);
563: }
565: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
566: {
567: PetscViewerFormat format;
568: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
569: Mat B;
570: PetscInt i;
571: PetscErrorCode ierr;
572: PetscBool ismpikaij;
575: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
576: PetscViewerGetFormat(viewer,&format);
577: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578: PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);
580: /* Print appropriate details for S. */
581: if (!a->S) {
582: PetscViewerASCIIPrintf(viewer,"S is NULL\n");
583: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584: PetscViewerASCIIPrintf(viewer,"Entries of S are ");
585: for (i=0; i<(a->p * a->q); i++) {
586: #if defined(PETSC_USE_COMPLEX)
587: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
588: #else
589: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
590: #endif
591: }
592: PetscViewerASCIIPrintf(viewer,"\n");
593: }
595: /* Print appropriate details for T. */
596: if (a->isTI) {
597: PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
598: } else if (!a->T) {
599: PetscViewerASCIIPrintf(viewer,"T is NULL\n");
600: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601: PetscViewerASCIIPrintf(viewer,"Entries of T are ");
602: for (i=0; i<(a->p * a->q); i++) {
603: #if defined(PETSC_USE_COMPLEX)
604: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
605: #else
606: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
607: #endif
608: }
609: PetscViewerASCIIPrintf(viewer,"\n");
610: }
612: /* Now print details for the AIJ matrix, using the AIJ viewer. */
613: PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
614: if (ismpikaij) {
615: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
616: MatView(b->A,viewer);
617: } else {
618: MatView(a->AIJ,viewer);
619: }
621: } else {
622: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623: if (ismpikaij) {
624: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
625: } else {
626: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
627: }
628: MatView(B,viewer);
629: MatDestroy(&B);
630: }
631: return(0);
632: }
634: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
635: {
637: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
640: MatDestroy(&b->AIJ);
641: MatDestroy(&b->OAIJ);
642: MatDestroy(&b->A);
643: VecScatterDestroy(&b->ctx);
644: VecDestroy(&b->w);
645: PetscFree(b->S);
646: PetscFree(b->T);
647: PetscFree(b->ibdiag);
648: PetscFree(A->data);
649: return(0);
650: }
652: /* --------------------------------------------------------------------------------------*/
654: /* zz = yy + Axx */
655: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
656: {
657: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
659: const PetscScalar *s = b->S, *t = b->T;
660: const PetscScalar *x,*v,*bx;
661: PetscScalar *y,*sums;
662: PetscErrorCode ierr;
663: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
664: PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k;
667: if (!yy) {
668: VecSet(zz,0.0);
669: } else {
670: VecCopy(yy,zz);
671: }
672: if ((!s) && (!t) && (!b->isTI)) return(0);
674: VecGetArrayRead(xx,&x);
675: VecGetArray(zz,&y);
676: idx = a->j;
677: v = a->a;
678: ii = a->i;
680: if (b->isTI) {
681: for (i=0; i<m; i++) {
682: jrow = ii[i];
683: n = ii[i+1] - jrow;
684: sums = y + p*i;
685: for (j=0; j<n; j++) {
686: for (k=0; k<p; k++) {
687: sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
688: }
689: }
690: }
691: PetscLogFlops(3.0*(a->nz)*p);
692: } else if (t) {
693: for (i=0; i<m; i++) {
694: jrow = ii[i];
695: n = ii[i+1] - jrow;
696: sums = y + p*i;
697: for (j=0; j<n; j++) {
698: for (k=0; k<p; k++) {
699: for (l=0; l<q; l++) {
700: sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
701: }
702: }
703: }
704: }
705: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
706: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
707: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
708: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
709: PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
710: }
711: if (s) {
712: for (i=0; i<m; i++) {
713: sums = y + p*i;
714: bx = x + q*i;
715: if (i < b->AIJ->cmap->n) {
716: for (j=0; j<q; j++) {
717: for (k=0; k<p; k++) {
718: sums[k] += s[k+j*p]*bx[j];
719: }
720: }
721: }
722: }
723: PetscLogFlops(2.0*m*p*q);
724: }
726: VecRestoreArrayRead(xx,&x);
727: VecRestoreArray(zz,&y);
728: return(0);
729: }
731: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
732: {
735: MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
736: return(0);
737: }
739: #include <petsc/private/kernels/blockinvert.h>
741: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
742: {
743: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
744: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
745: const PetscScalar *S = b->S;
746: const PetscScalar *T = b->T;
747: const PetscScalar *v = a->a;
748: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
749: PetscErrorCode ierr;
750: PetscInt i,j,*v_pivots,dof,dof2;
751: PetscScalar *diag,aval,*v_work;
754: if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
755: if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
757: dof = p;
758: dof2 = dof*dof;
760: if (b->ibdiagvalid) {
761: if (values) *values = b->ibdiag;
762: return(0);
763: }
764: if (!b->ibdiag) {
765: PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
766: PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
767: }
768: if (values) *values = b->ibdiag;
769: diag = b->ibdiag;
771: PetscMalloc2(dof,&v_work,dof,&v_pivots);
772: for (i=0; i<m; i++) {
773: if (S) {
774: PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
775: } else {
776: PetscMemzero(diag,dof2*sizeof(PetscScalar));
777: }
778: if (b->isTI) {
779: aval = 0;
780: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
781: for (j=0; j<dof; j++) diag[j+dof*j] += aval;
782: } else if (T) {
783: aval = 0;
784: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
785: for (j=0; j<dof2; j++) diag[j] += aval*T[j];
786: }
787: PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
788: diag += dof2;
789: }
790: PetscFree2(v_work,v_pivots);
792: b->ibdiagvalid = PETSC_TRUE;
793: return(0);
794: }
796: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
797: {
798: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
801: *B = kaij->AIJ;
802: return(0);
803: }
805: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
806: {
807: PetscErrorCode ierr;
808: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data;
809: Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data;
810: const PetscScalar *aa = a->a, *T = kaij->T, *v;
811: const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
812: const PetscScalar *b, *xb, *idiag;
813: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
814: PetscInt i, j, k, i2, bs, bs2, nz;
817: its = its*lits;
818: if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
819: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820: if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
821: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
822: if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
823: else {bs = p; bs2 = bs*bs; }
825: if (!m) return(0);
827: if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
828: idiag = kaij->ibdiag;
829: diag = a->diag;
831: if (!kaij->sor.setup) {
832: PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
833: kaij->sor.setup = PETSC_TRUE;
834: }
835: y = kaij->sor.y;
836: w = kaij->sor.w;
837: work = kaij->sor.work;
838: t = kaij->sor.t;
839: arr = kaij->sor.arr;
841: VecGetArray(xx,&x);
842: VecGetArrayRead(bb,&b);
844: if (flag & SOR_ZERO_INITIAL_GUESS) {
845: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */
847: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
848: i2 = bs;
849: idiag += bs2;
850: for (i=1; i<m; i++) {
851: v = aa + ai[i];
852: vi = aj + ai[i];
853: nz = diag[i] - ai[i];
855: if (T) { /* b - T (Arow * x) */
856: PetscMemzero(w,bs*sizeof(PetscScalar));
857: for (j=0; j<nz; j++) {
858: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
859: }
860: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
861: for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
862: } else if (kaij->isTI) {
863: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
864: for (j=0; j<nz; j++) {
865: for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
866: }
867: } else {
868: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
869: }
871: PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
872: for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
874: idiag += bs2;
875: i2 += bs;
876: }
877: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
878: PetscLogFlops(1.0*bs2*a->nz);
879: xb = t;
880: } else xb = b;
881: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882: idiag = kaij->ibdiag+bs2*(m-1);
883: i2 = bs * (m-1);
884: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
885: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
886: i2 -= bs;
887: idiag -= bs2;
888: for (i=m-2; i>=0; i--) {
889: v = aa + diag[i] + 1 ;
890: vi = aj + diag[i] + 1;
891: nz = ai[i+1] - diag[i] - 1;
893: if (T) { /* FIXME: This branch untested */
894: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
895: /* copy all rows of x that are needed into contiguous space */
896: workt = work;
897: for (j=0; j<nz; j++) {
898: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
899: workt += bs;
900: }
901: arrt = arr;
902: for (j=0; j<nz; j++) {
903: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
904: for (k=0; k<bs2; k++) arrt[k] *= v[j];
905: arrt += bs2;
906: }
907: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908: } else if (kaij->isTI) {
909: PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
910: for (j=0; j<nz; j++) {
911: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
912: }
913: }
915: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
916: for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
918: idiag -= bs2;
919: i2 -= bs;
920: }
921: PetscLogFlops(1.0*bs2*(a->nz));
922: }
923: its--;
924: }
925: while (its--) { /* FIXME: This branch not updated */
926: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
927: i2 = 0;
928: idiag = kaij->ibdiag;
929: for (i=0; i<m; i++) {
930: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
932: v = aa + ai[i];
933: vi = aj + ai[i];
934: nz = diag[i] - ai[i];
935: workt = work;
936: for (j=0; j<nz; j++) {
937: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
938: workt += bs;
939: }
940: arrt = arr;
941: if (T) {
942: for (j=0; j<nz; j++) {
943: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
944: for (k=0; k<bs2; k++) arrt[k] *= v[j];
945: arrt += bs2;
946: }
947: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
948: } else if (kaij->isTI) {
949: for (j=0; j<nz; j++) {
950: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
951: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
952: arrt += bs2;
953: }
954: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
955: }
956: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
958: v = aa + diag[i] + 1;
959: vi = aj + diag[i] + 1;
960: nz = ai[i+1] - diag[i] - 1;
961: workt = work;
962: for (j=0; j<nz; j++) {
963: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
964: workt += bs;
965: }
966: arrt = arr;
967: if (T) {
968: for (j=0; j<nz; j++) {
969: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
970: for (k=0; k<bs2; k++) arrt[k] *= v[j];
971: arrt += bs2;
972: }
973: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974: } else if (kaij->isTI) {
975: for (j=0; j<nz; j++) {
976: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
977: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978: arrt += bs2;
979: }
980: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981: }
983: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
986: idiag += bs2;
987: i2 += bs;
988: }
989: xb = t;
990: }
991: else xb = b;
992: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
993: idiag = kaij->ibdiag+bs2*(m-1);
994: i2 = bs * (m-1);
995: if (xb == b) {
996: for (i=m-1; i>=0; i--) {
997: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
999: v = aa + ai[i];
1000: vi = aj + ai[i];
1001: nz = diag[i] - ai[i];
1002: workt = work;
1003: for (j=0; j<nz; j++) {
1004: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1005: workt += bs;
1006: }
1007: arrt = arr;
1008: if (T) {
1009: for (j=0; j<nz; j++) {
1010: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1011: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012: arrt += bs2;
1013: }
1014: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015: } else if (kaij->isTI) {
1016: for (j=0; j<nz; j++) {
1017: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1018: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019: arrt += bs2;
1020: }
1021: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022: }
1024: v = aa + diag[i] + 1;
1025: vi = aj + diag[i] + 1;
1026: nz = ai[i+1] - diag[i] - 1;
1027: workt = work;
1028: for (j=0; j<nz; j++) {
1029: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1030: workt += bs;
1031: }
1032: arrt = arr;
1033: if (T) {
1034: for (j=0; j<nz; j++) {
1035: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1036: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1037: arrt += bs2;
1038: }
1039: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1040: } else if (kaij->isTI) {
1041: for (j=0; j<nz; j++) {
1042: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1043: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1044: arrt += bs2;
1045: }
1046: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1047: }
1049: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1050: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1051: }
1052: } else {
1053: for (i=m-1; i>=0; i--) {
1054: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1055: v = aa + diag[i] + 1;
1056: vi = aj + diag[i] + 1;
1057: nz = ai[i+1] - diag[i] - 1;
1058: workt = work;
1059: for (j=0; j<nz; j++) {
1060: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1061: workt += bs;
1062: }
1063: arrt = arr;
1064: if (T) {
1065: for (j=0; j<nz; j++) {
1066: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1067: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1068: arrt += bs2;
1069: }
1070: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1071: } else if (kaij->isTI) {
1072: for (j=0; j<nz; j++) {
1073: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1074: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1075: arrt += bs2;
1076: }
1077: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078: }
1079: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1080: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1081: }
1082: }
1083: PetscLogFlops(1.0*bs2*(a->nz));
1084: }
1085: }
1087: VecRestoreArray(xx,&x);
1088: VecRestoreArrayRead(bb,&b);
1089: return(0);
1090: }
1092: /*===================================================================================*/
1094: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1095: {
1096: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1100: if (!yy) {
1101: VecSet(zz,0.0);
1102: } else {
1103: VecCopy(yy,zz);
1104: }
1105: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1106: /* start the scatter */
1107: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1108: (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1109: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1110: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1111: return(0);
1112: }
1114: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1115: {
1118: MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1119: return(0);
1120: }
1122: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1123: {
1124: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1125: PetscErrorCode ierr;
1128: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1129: (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1130: return(0);
1131: }
1133: /* ----------------------------------------------------------------*/
1135: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1136: {
1137: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data;
1138: PetscErrorCode diag = PETSC_FALSE;
1139: PetscErrorCode ierr;
1140: PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1141: PetscScalar *vaij,*v,*S=b->S,*T=b->T;
1144: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1145: b->getrowactive = PETSC_TRUE;
1146: if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1148: if ((!S) && (!T) && (!b->isTI)) {
1149: if (ncols) *ncols = 0;
1150: if (cols) *cols = NULL;
1151: if (values) *values = NULL;
1152: return(0);
1153: }
1155: if (T || b->isTI) {
1156: MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1157: c = nzaij;
1158: for (i=0; i<nzaij; i++) {
1159: /* check if this row contains a diagonal entry */
1160: if (colsaij[i] == r) {
1161: diag = PETSC_TRUE;
1162: c = i;
1163: }
1164: }
1165: } else nzaij = c = 0;
1167: /* calculate size of row */
1168: nz = 0;
1169: if (S) nz += q;
1170: if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1172: if (cols || values) {
1173: PetscMalloc2(nz,&idx,nz,&v);
1174: for (i=0; i<q; i++) {
1175: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1176: v[i] = 0.0;
1177: }
1178: if (b->isTI) {
1179: for (i=0; i<nzaij; i++) {
1180: for (j=0; j<q; j++) {
1181: idx[i*q+j] = colsaij[i]*q+j;
1182: v[i*q+j] = (j==s ? vaij[i] : 0);
1183: }
1184: }
1185: } else if (T) {
1186: for (i=0; i<nzaij; i++) {
1187: for (j=0; j<q; j++) {
1188: idx[i*q+j] = colsaij[i]*q+j;
1189: v[i*q+j] = vaij[i]*T[s+j*p];
1190: }
1191: }
1192: }
1193: if (S) {
1194: for (j=0; j<q; j++) {
1195: idx[c*q+j] = r*q+j;
1196: v[c*q+j] += S[s+j*p];
1197: }
1198: }
1199: }
1201: if (ncols) *ncols = nz;
1202: if (cols) *cols = idx;
1203: if (values) *values = v;
1204: return(0);
1205: }
1207: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1208: {
1212: if (nz) *nz = 0;
1213: PetscFree2(*idx,*v);
1214: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1215: return(0);
1216: }
1218: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1219: {
1220: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data;
1221: Mat AIJ = b->A;
1222: PetscBool diag = PETSC_FALSE;
1223: Mat MatAIJ,MatOAIJ;
1224: PetscErrorCode ierr;
1225: const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1226: PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1227: PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T;
1230: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1231: MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1232: MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1233: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1234: b->getrowactive = PETSC_TRUE;
1235: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1236: lrow = row - rstart;
1238: if ((!S) && (!T) && (!b->isTI)) {
1239: if (ncols) *ncols = 0;
1240: if (cols) *cols = NULL;
1241: if (values) *values = NULL;
1242: return(0);
1243: }
1245: r = lrow/p;
1246: s = lrow%p;
1248: if (T || b->isTI) {
1249: MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1250: MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1251: MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);
1253: c = ncolsaij + ncolsoaij;
1254: for (i=0; i<ncolsaij; i++) {
1255: /* check if this row contains a diagonal entry */
1256: if (colsaij[i] == r) {
1257: diag = PETSC_TRUE;
1258: c = i;
1259: }
1260: }
1261: } else c = 0;
1263: /* calculate size of row */
1264: nz = 0;
1265: if (S) nz += q;
1266: if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1268: if (cols || values) {
1269: PetscMalloc2(nz,&idx,nz,&v);
1270: for (i=0; i<q; i++) {
1271: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1272: v[i] = 0.0;
1273: }
1274: if (b->isTI) {
1275: for (i=0; i<ncolsaij; i++) {
1276: for (j=0; j<q; j++) {
1277: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1278: v[i*q+j] = (j==s ? vals[i] : 0.0);
1279: }
1280: }
1281: for (i=0; i<ncolsoaij; i++) {
1282: for (j=0; j<q; j++) {
1283: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1284: v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0);
1285: }
1286: }
1287: } else if (T) {
1288: for (i=0; i<ncolsaij; i++) {
1289: for (j=0; j<q; j++) {
1290: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1291: v[i*q+j] = vals[i]*T[s+j*p];
1292: }
1293: }
1294: for (i=0; i<ncolsoaij; i++) {
1295: for (j=0; j<q; j++) {
1296: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1297: v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p];
1298: }
1299: }
1300: }
1301: if (S) {
1302: for (j=0; j<q; j++) {
1303: idx[c*q+j] = (r+rstart/p)*q+j;
1304: v[c*q+j] += S[s+j*p];
1305: }
1306: }
1307: }
1309: if (ncols) *ncols = nz;
1310: if (cols) *cols = idx;
1311: if (values) *values = v;
1312: return(0);
1313: }
1315: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1316: {
1319: PetscFree2(*idx,*v);
1320: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1321: return(0);
1322: }
1324: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1325: {
1327: Mat A;
1330: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1331: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1332: MatDestroy(&A);
1333: return(0);
1334: }
1336: /* ---------------------------------------------------------------------------------- */
1337: /*@C
1338: MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1340: [I \otimes S + A \otimes T]
1342: where
1343: S is a dense (p \times q) matrix
1344: T is a dense (p \times q) matrix
1345: A is an AIJ (n \times n) matrix
1346: I is the identity matrix
1347: The resulting matrix is (np \times nq)
1349: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1351: Collective
1353: Input Parameters:
1354: + A - the AIJ matrix
1355: . p - number of rows in S and T
1356: . q - number of columns in S and T
1357: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1358: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1360: Output Parameter:
1361: . kaij - the new KAIJ matrix
1363: Notes:
1364: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1365: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1367: Developer Notes:
1368: In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1369: of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1370: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1371: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1373: Level: advanced
1375: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1376: @*/
1377: PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1378: {
1380: PetscMPIInt size;
1383: MatCreate(PetscObjectComm((PetscObject)A),kaij);
1384: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1385: if (size == 1) {
1386: MatSetType(*kaij,MATSEQKAIJ);
1387: } else {
1388: MatSetType(*kaij,MATMPIKAIJ);
1389: }
1390: MatKAIJSetAIJ(*kaij,A);
1391: MatKAIJSetS(*kaij,p,q,S);
1392: MatKAIJSetT(*kaij,p,q,T);
1393: MatSetUp(*kaij);
1394: return(0);
1395: }
1397: /*MC
1398: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1399: [I \otimes S + A \otimes T],
1400: where
1401: S is a dense (p \times q) matrix,
1402: T is a dense (p \times q) matrix,
1403: A is an AIJ (n \times n) matrix,
1404: and I is the identity matrix.
1405: The resulting matrix is (np \times nq).
1407: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1409: Notes:
1410: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1411: where x and b are column vectors containing the row-major representations of X and B.
1413: Level: advanced
1415: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1416: M*/
1418: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1419: {
1421: Mat_MPIKAIJ *b;
1422: PetscMPIInt size;
1425: PetscNewLog(A,&b);
1426: A->data = (void*)b;
1428: PetscMemzero(A->ops,sizeof(struct _MatOps));
1430: A->ops->setup = MatSetUp_KAIJ;
1432: b->w = NULL;
1433: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1434: if (size == 1) {
1435: PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1436: A->ops->setup = MatSetUp_KAIJ;
1437: A->ops->destroy = MatDestroy_SeqKAIJ;
1438: A->ops->view = MatView_KAIJ;
1439: A->ops->mult = MatMult_SeqKAIJ;
1440: A->ops->multadd = MatMultAdd_SeqKAIJ;
1441: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1442: A->ops->getrow = MatGetRow_SeqKAIJ;
1443: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1444: A->ops->sor = MatSOR_SeqKAIJ;
1445: } else {
1446: PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1447: A->ops->setup = MatSetUp_KAIJ;
1448: A->ops->destroy = MatDestroy_MPIKAIJ;
1449: A->ops->view = MatView_KAIJ;
1450: A->ops->mult = MatMult_MPIKAIJ;
1451: A->ops->multadd = MatMultAdd_MPIKAIJ;
1452: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1453: A->ops->getrow = MatGetRow_MPIKAIJ;
1454: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1455: PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1456: }
1457: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1458: return(0);
1459: }