Actual source code: kaij.c
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
33: Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
35: Input Parameter:
36: . A - the KAIJ matrix
38: Output Parameter:
39: . B - the AIJ matrix
41: Level: advanced
43: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
45: .seealso: MatCreateKAIJ()
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B)
48: {
50: PetscBool ismpikaij,isseqkaij;
53: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
54: PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
55: if (ismpikaij) {
56: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
58: *B = b->A;
59: } else if (isseqkaij) {
60: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
62: *B = b->AIJ;
63: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64: return(0);
65: }
67: /*@C
68: MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
70: Not Collective; the entire S is stored and returned independently on all processes.
72: Input Parameter:
73: . A - the KAIJ matrix
75: Output Parameters:
76: + m - the number of rows in S
77: . n - the number of columns in S
78: - S - the S matrix, in form of a scalar array in column-major format
80: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
82: Level: advanced
84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
90: if (m) *m = b->p;
91: if (n) *n = b->q;
92: if (S) *S = b->S;
93: return(0);
94: }
96: /*@C
97: MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
99: Not Collective; the entire S is stored and returned independently on all processes.
101: Input Parameter:
102: . A - the KAIJ matrix
104: Output Parameters:
105: + m - the number of rows in S
106: . n - the number of columns in S
107: - S - the S matrix, in form of a scalar array in column-major format
109: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
111: Level: advanced
113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119: if (m) *m = b->p;
120: if (n) *n = b->q;
121: if (S) *S = b->S;
122: return(0);
123: }
125: /*@C
126: MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
128: Not collective
130: Input Parameter:
131: . A - the KAIJ matrix
133: Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()
136: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137: If NULL is passed, it will not attempt to zero the array pointer.
139: Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {
147: if (S) *S = NULL;
148: PetscObjectStateIncrease((PetscObject)A);
149: return(0);
150: }
152: /*@C
153: MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
155: Not collective
157: Input Parameter:
158: . A - the KAIJ matrix
160: Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()
163: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164: If NULL is passed, it will not attempt to zero the array pointer.
166: Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172: if (S) *S = NULL;
173: return(0);
174: }
176: /*@C
177: MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
179: Not Collective; the entire T is stored and returned independently on all processes
181: Input Parameter:
182: . A - the KAIJ matrix
184: Output Parameter:
185: + m - the number of rows in T
186: . n - the number of columns in T
187: - T - the T matrix, in form of a scalar array in column-major format
189: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
191: Level: advanced
193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199: if (m) *m = b->p;
200: if (n) *n = b->q;
201: if (T) *T = b->T;
202: return(0);
203: }
205: /*@C
206: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
208: Not Collective; the entire T is stored and returned independently on all processes
210: Input Parameter:
211: . A - the KAIJ matrix
213: Output Parameter:
214: + m - the number of rows in T
215: . n - the number of columns in T
216: - T - the T matrix, in form of a scalar array in column-major format
218: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
220: Level: advanced
222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228: if (m) *m = b->p;
229: if (n) *n = b->q;
230: if (T) *T = b->T;
231: return(0);
232: }
234: /*@C
235: MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
237: Not collective
239: Input Parameter:
240: . A - the KAIJ matrix
242: Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()
245: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246: If NULL is passed, it will not attempt to zero the array pointer.
248: Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {
256: if (T) *T = NULL;
257: PetscObjectStateIncrease((PetscObject)A);
258: return(0);
259: }
261: /*@C
262: MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
264: Not collective
266: Input Parameter:
267: . A - the KAIJ matrix
269: Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()
272: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273: If NULL is passed, it will not attempt to zero the array pointer.
275: Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281: if (T) *T = NULL;
282: return(0);
283: }
285: /*@
286: MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
288: Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
290: Input Parameters:
291: + A - the KAIJ matrix
292: - B - the AIJ matrix
294: Notes:
295: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
298: Level: advanced
300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305: PetscMPIInt size;
308: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309: if (size == 1) {
310: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311: a->AIJ = B;
312: } else {
313: Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314: a->A = B;
315: }
316: PetscObjectReference((PetscObject)B);
317: return(0);
318: }
320: /*@C
321: MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
323: Logically Collective; the entire S is stored independently on all processes.
325: Input Parameters:
326: + A - the KAIJ matrix
327: . p - the number of rows in S
328: . q - the number of columns in S
329: - S - the S matrix, in form of a scalar array in column-major format
331: Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332: The S matrix is copied, so the user can destroy this array.
334: Level: Advanced
336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
344: PetscFree(a->S);
345: if (S) {
346: PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347: PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348: } else a->S = NULL;
350: a->p = p;
351: a->q = q;
352: return(0);
353: }
355: /*@C
356: MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
358: Logically Collective.
360: Input Parameter:
361: . A - the KAIJ matrix
363: Output Parameter:
364: . identity - the Boolean value
366: Level: Advanced
368: .seealso: MatKAIJGetS(), MatKAIJGetT()
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371: {
372: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373: PetscInt i,j;
376: if (a->p != a->q) {
377: *identity = PETSC_FALSE;
378: return(0);
379: } else *identity = PETSC_TRUE;
380: if (!a->isTI || a->S) {
381: for (i=0; i<a->p && *identity; i++) {
382: for (j=0; j<a->p && *identity; j++) {
383: if (i != j) {
384: if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385: if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386: } else {
387: if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388: if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389: }
390: }
391: }
392: }
393: return(0);
394: }
396: /*@C
397: MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
399: Logically Collective; the entire T is stored independently on all processes.
401: Input Parameters:
402: + A - the KAIJ matrix
403: . p - the number of rows in S
404: . q - the number of columns in S
405: - T - the T matrix, in form of a scalar array in column-major format
407: Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408: The T matrix is copied, so the user can destroy this array.
410: Level: Advanced
412: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413: @*/
414: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415: {
417: PetscInt i,j;
418: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
419: PetscBool isTI = PETSC_FALSE;
422: /* check if T is an identity matrix */
423: if (T && (p == q)) {
424: isTI = PETSC_TRUE;
425: for (i=0; i<p; i++) {
426: for (j=0; j<q; j++) {
427: if (i == j) {
428: /* diagonal term must be 1 */
429: if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430: } else {
431: /* off-diagonal term must be 0 */
432: if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433: }
434: }
435: }
436: }
437: a->isTI = isTI;
439: PetscFree(a->T);
440: if (T && (!isTI)) {
441: PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
442: PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
443: } else a->T = NULL;
445: a->p = p;
446: a->q = q;
447: return(0);
448: }
450: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451: {
453: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
456: MatDestroy(&b->AIJ);
457: PetscFree(b->S);
458: PetscFree(b->T);
459: PetscFree(b->ibdiag);
460: PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
461: PetscFree(A->data);
462: return(0);
463: }
465: PetscErrorCode MatSetUp_KAIJ(Mat A)
466: {
468: PetscInt n;
469: PetscMPIInt size;
470: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data;
473: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
474: if (size == 1) {
475: MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
476: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
477: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
478: PetscLayoutSetUp(A->rmap);
479: PetscLayoutSetUp(A->cmap);
480: } else {
481: Mat_MPIKAIJ *a;
482: Mat_MPIAIJ *mpiaij;
483: IS from,to;
484: Vec gvec;
485: PetscScalar *T;
486: PetscInt i,j;
488: a = (Mat_MPIKAIJ*)A->data;
489: mpiaij = (Mat_MPIAIJ*)a->A->data;
490: MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
491: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
492: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
493: PetscLayoutSetUp(A->rmap);
494: PetscLayoutSetUp(A->cmap);
496: if (a->isTI) {
497: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
498: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
499: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
500: * to pass in. */
501: PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
502: for (i=0; i<a->p; i++) {
503: for (j=0; j<a->q; j++) {
504: if (i==j) T[i+j*a->p] = 1.0;
505: else T[i+j*a->p] = 0.0;
506: }
507: }
508: } else T = a->T;
509: MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
510: MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
511: if (a->isTI) {
512: PetscFree(T);
513: }
515: VecGetSize(mpiaij->lvec,&n);
516: VecCreate(PETSC_COMM_SELF,&a->w);
517: VecSetSizes(a->w,n*a->q,n*a->q);
518: VecSetBlockSize(a->w,a->q);
519: VecSetType(a->w,VECSEQ);
521: /* create two temporary Index sets for build scatter gather */
522: ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
523: ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);
525: /* create temporary global vector to generate scatter context */
526: VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);
528: /* generate the scatter context */
529: VecScatterCreate(gvec,from,a->w,to,&a->ctx);
531: ISDestroy(&from);
532: ISDestroy(&to);
533: VecDestroy(&gvec);
534: }
536: A->assembled = PETSC_TRUE;
537: return(0);
538: }
540: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
541: {
542: PetscViewerFormat format;
543: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
544: Mat B;
545: PetscInt i;
546: PetscErrorCode ierr;
547: PetscBool ismpikaij;
550: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
551: PetscViewerGetFormat(viewer,&format);
552: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553: PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);
555: /* Print appropriate details for S. */
556: if (!a->S) {
557: PetscViewerASCIIPrintf(viewer,"S is NULL\n");
558: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559: PetscViewerASCIIPrintf(viewer,"Entries of S are ");
560: for (i=0; i<(a->p * a->q); i++) {
561: #if defined(PETSC_USE_COMPLEX)
562: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
563: #else
564: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
565: #endif
566: }
567: PetscViewerASCIIPrintf(viewer,"\n");
568: }
570: /* Print appropriate details for T. */
571: if (a->isTI) {
572: PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
573: } else if (!a->T) {
574: PetscViewerASCIIPrintf(viewer,"T is NULL\n");
575: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576: PetscViewerASCIIPrintf(viewer,"Entries of T are ");
577: for (i=0; i<(a->p * a->q); i++) {
578: #if defined(PETSC_USE_COMPLEX)
579: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
580: #else
581: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
582: #endif
583: }
584: PetscViewerASCIIPrintf(viewer,"\n");
585: }
587: /* Now print details for the AIJ matrix, using the AIJ viewer. */
588: PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
589: if (ismpikaij) {
590: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591: MatView(b->A,viewer);
592: } else {
593: MatView(a->AIJ,viewer);
594: }
596: } else {
597: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598: if (ismpikaij) {
599: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
600: } else {
601: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
602: }
603: MatView(B,viewer);
604: MatDestroy(&B);
605: }
606: return(0);
607: }
609: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
610: {
612: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
615: MatDestroy(&b->AIJ);
616: MatDestroy(&b->OAIJ);
617: MatDestroy(&b->A);
618: VecScatterDestroy(&b->ctx);
619: VecDestroy(&b->w);
620: PetscFree(b->S);
621: PetscFree(b->T);
622: PetscFree(b->ibdiag);
623: PetscFree(A->data);
624: return(0);
625: }
627: /* --------------------------------------------------------------------------------------*/
629: /* zz = yy + Axx */
630: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631: {
632: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
633: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
634: const PetscScalar *s = b->S, *t = b->T;
635: const PetscScalar *x,*v,*bx;
636: PetscScalar *y,*sums;
637: PetscErrorCode ierr;
638: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
639: PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k;
642: if (!yy) {
643: VecSet(zz,0.0);
644: } else {
645: VecCopy(yy,zz);
646: }
647: if ((!s) && (!t) && (!b->isTI)) return(0);
649: VecGetArrayRead(xx,&x);
650: VecGetArray(zz,&y);
651: idx = a->j;
652: v = a->a;
653: ii = a->i;
655: if (b->isTI) {
656: for (i=0; i<m; i++) {
657: jrow = ii[i];
658: n = ii[i+1] - jrow;
659: sums = y + p*i;
660: for (j=0; j<n; j++) {
661: for (k=0; k<p; k++) {
662: sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
663: }
664: }
665: }
666: PetscLogFlops(3.0*(a->nz)*p);
667: } else if (t) {
668: for (i=0; i<m; i++) {
669: jrow = ii[i];
670: n = ii[i+1] - jrow;
671: sums = y + p*i;
672: for (j=0; j<n; j++) {
673: for (k=0; k<p; k++) {
674: for (l=0; l<q; l++) {
675: sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
676: }
677: }
678: }
679: }
680: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
681: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
682: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
683: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
684: PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
685: }
686: if (s) {
687: for (i=0; i<m; i++) {
688: sums = y + p*i;
689: bx = x + q*i;
690: if (i < b->AIJ->cmap->n) {
691: for (j=0; j<q; j++) {
692: for (k=0; k<p; k++) {
693: sums[k] += s[k+j*p]*bx[j];
694: }
695: }
696: }
697: }
698: PetscLogFlops(2.0*m*p*q);
699: }
701: VecRestoreArrayRead(xx,&x);
702: VecRestoreArray(zz,&y);
703: return(0);
704: }
706: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
707: {
710: MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
711: return(0);
712: }
714: #include <petsc/private/kernels/blockinvert.h>
716: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
717: {
718: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
719: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
720: const PetscScalar *S = b->S;
721: const PetscScalar *T = b->T;
722: const PetscScalar *v = a->a;
723: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
724: PetscErrorCode ierr;
725: PetscInt i,j,*v_pivots,dof,dof2;
726: PetscScalar *diag,aval,*v_work;
729: if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
730: if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
732: dof = p;
733: dof2 = dof*dof;
735: if (b->ibdiagvalid) {
736: if (values) *values = b->ibdiag;
737: return(0);
738: }
739: if (!b->ibdiag) {
740: PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
741: PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
742: }
743: if (values) *values = b->ibdiag;
744: diag = b->ibdiag;
746: PetscMalloc2(dof,&v_work,dof,&v_pivots);
747: for (i=0; i<m; i++) {
748: if (S) {
749: PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
750: } else {
751: PetscMemzero(diag,dof2*sizeof(PetscScalar));
752: }
753: if (b->isTI) {
754: aval = 0;
755: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
756: for (j=0; j<dof; j++) diag[j+dof*j] += aval;
757: } else if (T) {
758: aval = 0;
759: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
760: for (j=0; j<dof2; j++) diag[j] += aval*T[j];
761: }
762: PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
763: diag += dof2;
764: }
765: PetscFree2(v_work,v_pivots);
767: b->ibdiagvalid = PETSC_TRUE;
768: return(0);
769: }
771: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
772: {
773: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
776: *B = kaij->AIJ;
777: return(0);
778: }
780: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
781: {
782: PetscErrorCode ierr;
783: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data;
784: Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data;
785: const PetscScalar *aa = a->a, *T = kaij->T, *v;
786: const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
787: const PetscScalar *b, *xb, *idiag;
788: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
789: PetscInt i, j, k, i2, bs, bs2, nz;
792: its = its*lits;
793: if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
794: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
795: if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
796: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
797: if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
798: else {bs = p; bs2 = bs*bs; }
800: if (!m) return(0);
802: if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
803: idiag = kaij->ibdiag;
804: diag = a->diag;
806: if (!kaij->sor.setup) {
807: PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
808: kaij->sor.setup = PETSC_TRUE;
809: }
810: y = kaij->sor.y;
811: w = kaij->sor.w;
812: work = kaij->sor.work;
813: t = kaij->sor.t;
814: arr = kaij->sor.arr;
816: VecGetArray(xx,&x);
817: VecGetArrayRead(bb,&b);
819: if (flag & SOR_ZERO_INITIAL_GUESS) {
820: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
821: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */
822: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
823: i2 = bs;
824: idiag += bs2;
825: for (i=1; i<m; i++) {
826: v = aa + ai[i];
827: vi = aj + ai[i];
828: nz = diag[i] - ai[i];
830: if (T) { /* b - T (Arow * x) */
831: PetscMemzero(w,bs*sizeof(PetscScalar));
832: for (j=0; j<nz; j++) {
833: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
834: }
835: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
836: for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
837: } else if (kaij->isTI) {
838: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
839: for (j=0; j<nz; j++) {
840: for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
841: }
842: } else {
843: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
844: }
846: PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
847: for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
849: idiag += bs2;
850: i2 += bs;
851: }
852: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
853: PetscLogFlops(1.0*bs2*a->nz);
854: xb = t;
855: } else xb = b;
856: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
857: idiag = kaij->ibdiag+bs2*(m-1);
858: i2 = bs * (m-1);
859: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
860: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
861: i2 -= bs;
862: idiag -= bs2;
863: for (i=m-2; i>=0; i--) {
864: v = aa + diag[i] + 1 ;
865: vi = aj + diag[i] + 1;
866: nz = ai[i+1] - diag[i] - 1;
868: if (T) { /* FIXME: This branch untested */
869: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
870: /* copy all rows of x that are needed into contiguous space */
871: workt = work;
872: for (j=0; j<nz; j++) {
873: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
874: workt += bs;
875: }
876: arrt = arr;
877: for (j=0; j<nz; j++) {
878: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
879: for (k=0; k<bs2; k++) arrt[k] *= v[j];
880: arrt += bs2;
881: }
882: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
883: } else if (kaij->isTI) {
884: PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
885: for (j=0; j<nz; j++) {
886: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
887: }
888: }
890: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
891: for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
893: idiag -= bs2;
894: i2 -= bs;
895: }
896: PetscLogFlops(1.0*bs2*(a->nz));
897: }
898: its--;
899: }
900: while (its--) { /* FIXME: This branch not updated */
901: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
902: i2 = 0;
903: idiag = kaij->ibdiag;
904: for (i=0; i<m; i++) {
905: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
907: v = aa + ai[i];
908: vi = aj + ai[i];
909: nz = diag[i] - ai[i];
910: workt = work;
911: for (j=0; j<nz; j++) {
912: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
913: workt += bs;
914: }
915: arrt = arr;
916: if (T) {
917: for (j=0; j<nz; j++) {
918: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
919: for (k=0; k<bs2; k++) arrt[k] *= v[j];
920: arrt += bs2;
921: }
922: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
923: } else if (kaij->isTI) {
924: for (j=0; j<nz; j++) {
925: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
926: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
927: arrt += bs2;
928: }
929: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
930: }
931: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
933: v = aa + diag[i] + 1;
934: vi = aj + diag[i] + 1;
935: nz = ai[i+1] - diag[i] - 1;
936: workt = work;
937: for (j=0; j<nz; j++) {
938: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
939: workt += bs;
940: }
941: arrt = arr;
942: if (T) {
943: for (j=0; j<nz; j++) {
944: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
945: for (k=0; k<bs2; k++) arrt[k] *= v[j];
946: arrt += bs2;
947: }
948: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949: } else if (kaij->isTI) {
950: for (j=0; j<nz; j++) {
951: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
952: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953: arrt += bs2;
954: }
955: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956: }
958: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
959: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
961: idiag += bs2;
962: i2 += bs;
963: }
964: xb = t;
965: }
966: else xb = b;
967: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
968: idiag = kaij->ibdiag+bs2*(m-1);
969: i2 = bs * (m-1);
970: if (xb == b) {
971: for (i=m-1; i>=0; i--) {
972: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
974: v = aa + ai[i];
975: vi = aj + ai[i];
976: nz = diag[i] - ai[i];
977: workt = work;
978: for (j=0; j<nz; j++) {
979: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
980: workt += bs;
981: }
982: arrt = arr;
983: if (T) {
984: for (j=0; j<nz; j++) {
985: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
986: for (k=0; k<bs2; k++) arrt[k] *= v[j];
987: arrt += bs2;
988: }
989: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
990: } else if (kaij->isTI) {
991: for (j=0; j<nz; j++) {
992: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
993: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
994: arrt += bs2;
995: }
996: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
997: }
999: v = aa + diag[i] + 1;
1000: vi = aj + diag[i] + 1;
1001: nz = ai[i+1] - diag[i] - 1;
1002: workt = work;
1003: for (j=0; j<nz; j++) {
1004: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1005: workt += bs;
1006: }
1007: arrt = arr;
1008: if (T) {
1009: for (j=0; j<nz; j++) {
1010: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1011: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012: arrt += bs2;
1013: }
1014: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015: } else if (kaij->isTI) {
1016: for (j=0; j<nz; j++) {
1017: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1018: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019: arrt += bs2;
1020: }
1021: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022: }
1024: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1025: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1026: }
1027: } else {
1028: for (i=m-1; i>=0; i--) {
1029: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1030: v = aa + diag[i] + 1;
1031: vi = aj + diag[i] + 1;
1032: nz = ai[i+1] - diag[i] - 1;
1033: workt = work;
1034: for (j=0; j<nz; j++) {
1035: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1036: workt += bs;
1037: }
1038: arrt = arr;
1039: if (T) {
1040: for (j=0; j<nz; j++) {
1041: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1042: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1043: arrt += bs2;
1044: }
1045: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1046: } else if (kaij->isTI) {
1047: for (j=0; j<nz; j++) {
1048: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1049: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1050: arrt += bs2;
1051: }
1052: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053: }
1054: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1055: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1056: }
1057: }
1058: PetscLogFlops(1.0*bs2*(a->nz));
1059: }
1060: }
1062: VecRestoreArray(xx,&x);
1063: VecRestoreArrayRead(bb,&b);
1064: return(0);
1065: }
1067: /*===================================================================================*/
1069: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1070: {
1071: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1075: if (!yy) {
1076: VecSet(zz,0.0);
1077: } else {
1078: VecCopy(yy,zz);
1079: }
1080: /* start the scatter */
1081: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1082: (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1083: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1084: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1085: return(0);
1086: }
1088: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1089: {
1092: MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1093: return(0);
1094: }
1096: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1097: {
1098: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1099: PetscErrorCode ierr;
1102: (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1103: return(0);
1104: }
1106: /* ----------------------------------------------------------------*/
1108: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1109: {
1110: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data;
1111: PetscErrorCode diag = PETSC_FALSE;
1112: PetscErrorCode ierr;
1113: PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1114: PetscScalar *vaij,*v,*S=b->S,*T=b->T;
1117: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1118: b->getrowactive = PETSC_TRUE;
1119: if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1121: if ((!S) && (!T) && (!b->isTI)) {
1122: if (ncols) *ncols = 0;
1123: if (cols) *cols = NULL;
1124: if (values) *values = NULL;
1125: return(0);
1126: }
1128: if (T || b->isTI) {
1129: MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1130: c = nzaij;
1131: for (i=0; i<nzaij; i++) {
1132: /* check if this row contains a diagonal entry */
1133: if (colsaij[i] == r) {
1134: diag = PETSC_TRUE;
1135: c = i;
1136: }
1137: }
1138: } else nzaij = c = 0;
1140: /* calculate size of row */
1141: nz = 0;
1142: if (S) nz += q;
1143: if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1145: if (cols || values) {
1146: PetscMalloc2(nz,&idx,nz,&v);
1147: for (i=0; i<q; i++) {
1148: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1149: v[i] = 0.0;
1150: }
1151: if (b->isTI) {
1152: for (i=0; i<nzaij; i++) {
1153: for (j=0; j<q; j++) {
1154: idx[i*q+j] = colsaij[i]*q+j;
1155: v[i*q+j] = (j==s ? vaij[i] : 0);
1156: }
1157: }
1158: } else if (T) {
1159: for (i=0; i<nzaij; i++) {
1160: for (j=0; j<q; j++) {
1161: idx[i*q+j] = colsaij[i]*q+j;
1162: v[i*q+j] = vaij[i]*T[s+j*p];
1163: }
1164: }
1165: }
1166: if (S) {
1167: for (j=0; j<q; j++) {
1168: idx[c*q+j] = r*q+j;
1169: v[c*q+j] += S[s+j*p];
1170: }
1171: }
1172: }
1174: if (ncols) *ncols = nz;
1175: if (cols) *cols = idx;
1176: if (values) *values = v;
1177: return(0);
1178: }
1180: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1181: {
1185: if (nz) *nz = 0;
1186: PetscFree2(*idx,*v);
1187: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1188: return(0);
1189: }
1191: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1192: {
1193: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data;
1194: Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1195: Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1196: Mat AIJ = b->A;
1197: PetscBool diag = PETSC_FALSE;
1198: PetscErrorCode ierr;
1199: const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1200: PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1201: PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T;
1204: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1205: b->getrowactive = PETSC_TRUE;
1206: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1207: lrow = row - rstart;
1209: if ((!S) && (!T) && (!b->isTI)) {
1210: if (ncols) *ncols = 0;
1211: if (cols) *cols = NULL;
1212: if (values) *values = NULL;
1213: return(0);
1214: }
1216: r = lrow/p;
1217: s = lrow%p;
1219: if (T || b->isTI) {
1220: MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1221: MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1222: MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);
1224: c = ncolsaij + ncolsoaij;
1225: for (i=0; i<ncolsaij; i++) {
1226: /* check if this row contains a diagonal entry */
1227: if (colsaij[i] == r) {
1228: diag = PETSC_TRUE;
1229: c = i;
1230: }
1231: }
1232: } else c = 0;
1234: /* calculate size of row */
1235: nz = 0;
1236: if (S) nz += q;
1237: if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1239: if (cols || values) {
1240: PetscMalloc2(nz,&idx,nz,&v);
1241: for (i=0; i<q; i++) {
1242: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1243: v[i] = 0.0;
1244: }
1245: if (b->isTI) {
1246: for (i=0; i<ncolsaij; i++) {
1247: for (j=0; j<q; j++) {
1248: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1249: v[i*q+j] = (j==s ? vals[i] : 0.0);
1250: }
1251: }
1252: for (i=0; i<ncolsoaij; i++) {
1253: for (j=0; j<q; j++) {
1254: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1255: v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0);
1256: }
1257: }
1258: } else if (T) {
1259: for (i=0; i<ncolsaij; i++) {
1260: for (j=0; j<q; j++) {
1261: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1262: v[i*q+j] = vals[i]*T[s+j*p];
1263: }
1264: }
1265: for (i=0; i<ncolsoaij; i++) {
1266: for (j=0; j<q; j++) {
1267: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1268: v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p];
1269: }
1270: }
1271: }
1272: if (S) {
1273: for (j=0; j<q; j++) {
1274: idx[c*q+j] = (r+rstart/p)*q+j;
1275: v[c*q+j] += S[s+j*p];
1276: }
1277: }
1278: }
1280: if (ncols) *ncols = nz;
1281: if (cols) *cols = idx;
1282: if (values) *values = v;
1283: return(0);
1284: }
1286: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1287: {
1290: PetscFree2(*idx,*v);
1291: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1292: return(0);
1293: }
1295: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1296: {
1298: Mat A;
1301: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1302: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1303: MatDestroy(&A);
1304: return(0);
1305: }
1307: /* ---------------------------------------------------------------------------------- */
1308: /*@C
1309: MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1311: [I \otimes S + A \otimes T]
1313: where
1314: S is a dense (p \times q) matrix
1315: T is a dense (p \times q) matrix
1316: A is an AIJ (n \times n) matrix
1317: I is the identity matrix
1318: The resulting matrix is (np \times nq)
1320: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1322: Collective
1324: Input Parameters:
1325: + A - the AIJ matrix
1326: . p - number of rows in S and T
1327: . q - number of columns in S and T
1328: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1329: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1331: Output Parameter:
1332: . kaij - the new KAIJ matrix
1334: Notes:
1335: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1336: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1338: Level: advanced
1340: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1341: @*/
1342: PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1343: {
1345: PetscMPIInt size;
1348: MatCreate(PetscObjectComm((PetscObject)A),kaij);
1349: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1350: if (size == 1) {
1351: MatSetType(*kaij,MATSEQKAIJ);
1352: } else {
1353: MatSetType(*kaij,MATMPIKAIJ);
1354: }
1355: MatKAIJSetAIJ(*kaij,A);
1356: MatKAIJSetS(*kaij,p,q,S);
1357: MatKAIJSetT(*kaij,p,q,T);
1358: MatSetUp(*kaij);
1359: return(0);
1360: }
1362: /*MC
1363: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1364: [I \otimes S + A \otimes T],
1365: where
1366: S is a dense (p \times q) matrix,
1367: T is a dense (p \times q) matrix,
1368: A is an AIJ (n \times n) matrix,
1369: and I is the identity matrix.
1370: The resulting matrix is (np \times nq).
1372: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1374: Notes:
1375: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1376: where x and b are column vectors containing the row-major representations of X and B.
1378: Level: advanced
1380: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1381: M*/
1383: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1384: {
1386: Mat_MPIKAIJ *b;
1387: PetscMPIInt size;
1390: PetscNewLog(A,&b);
1391: A->data = (void*)b;
1393: PetscMemzero(A->ops,sizeof(struct _MatOps));
1395: A->ops->setup = MatSetUp_KAIJ;
1397: b->w = NULL;
1398: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1399: if (size == 1) {
1400: PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1401: A->ops->setup = MatSetUp_KAIJ;
1402: A->ops->destroy = MatDestroy_SeqKAIJ;
1403: A->ops->view = MatView_KAIJ;
1404: A->ops->mult = MatMult_SeqKAIJ;
1405: A->ops->multadd = MatMultAdd_SeqKAIJ;
1406: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1407: A->ops->getrow = MatGetRow_SeqKAIJ;
1408: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1409: A->ops->sor = MatSOR_SeqKAIJ;
1410: } else {
1411: PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1412: A->ops->setup = MatSetUp_KAIJ;
1413: A->ops->destroy = MatDestroy_MPIKAIJ;
1414: A->ops->view = MatView_KAIJ;
1415: A->ops->mult = MatMult_MPIKAIJ;
1416: A->ops->multadd = MatMultAdd_MPIKAIJ;
1417: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1418: A->ops->getrow = MatGetRow_MPIKAIJ;
1419: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1420: PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1421: }
1422: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1423: return(0);
1424: }