Actual source code: kaij.c
petsc-3.12.5 2020-03-29
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
33: Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
35: Input Parameter:
36: . A - the KAIJ matrix
38: Output Parameter:
39: . B - the AIJ matrix
41: Level: advanced
43: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
45: .seealso: MatCreateKAIJ()
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B)
48: {
50: PetscBool ismpikaij,isseqkaij;
53: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
54: PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
55: if (ismpikaij) {
56: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
58: *B = b->A;
59: } else if (isseqkaij) {
60: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
62: *B = b->AIJ;
63: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64: return(0);
65: }
67: /*@C
68: MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
70: Not Collective; the entire S is stored and returned independently on all processes.
72: Input Parameter:
73: . A - the KAIJ matrix
75: Output Parameters:
76: + m - the number of rows in S
77: . n - the number of columns in S
78: - S - the S matrix, in form of a scalar array in column-major format
80: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
82: Level: advanced
84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
90: if (m) *m = b->p;
91: if (n) *n = b->q;
92: if (S) *S = b->S;
93: return(0);
94: }
96: /*@C
97: MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
99: Not Collective; the entire S is stored and returned independently on all processes.
101: Input Parameter:
102: . A - the KAIJ matrix
104: Output Parameters:
105: + m - the number of rows in S
106: . n - the number of columns in S
107: - S - the S matrix, in form of a scalar array in column-major format
109: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
111: Level: advanced
113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119: if (m) *m = b->p;
120: if (n) *n = b->q;
121: if (S) *S = b->S;
122: return(0);
123: }
125: /*@C
126: MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
128: Not collective
130: Input Parameter:
131: . A - the KAIJ matrix
133: Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()
136: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137: If NULL is passed, it will not attempt to zero the array pointer.
139: Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {
147: if (S) *S = NULL;
148: PetscObjectStateIncrease((PetscObject)A);
149: return(0);
150: }
152: /*@C
153: MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
155: Not collective
157: Input Parameter:
158: . A - the KAIJ matrix
160: Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()
163: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164: If NULL is passed, it will not attempt to zero the array pointer.
166: Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172: if (S) *S = NULL;
173: return(0);
174: }
176: /*@C
177: MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
179: Not Collective; the entire T is stored and returned independently on all processes
181: Input Parameter:
182: . A - the KAIJ matrix
184: Output Parameter:
185: + m - the number of rows in T
186: . n - the number of columns in T
187: - T - the T matrix, in form of a scalar array in column-major format
189: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
191: Level: advanced
193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199: if (m) *m = b->p;
200: if (n) *n = b->q;
201: if (T) *T = b->T;
202: return(0);
203: }
205: /*@C
206: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
208: Not Collective; the entire T is stored and returned independently on all processes
210: Input Parameter:
211: . A - the KAIJ matrix
213: Output Parameter:
214: + m - the number of rows in T
215: . n - the number of columns in T
216: - T - the T matrix, in form of a scalar array in column-major format
218: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
220: Level: advanced
222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228: if (m) *m = b->p;
229: if (n) *n = b->q;
230: if (T) *T = b->T;
231: return(0);
232: }
234: /*@C
235: MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
237: Not collective
239: Input Parameter:
240: . A - the KAIJ matrix
242: Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()
245: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246: If NULL is passed, it will not attempt to zero the array pointer.
248: Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {
256: if (T) *T = NULL;
257: PetscObjectStateIncrease((PetscObject)A);
258: return(0);
259: }
261: /*@C
262: MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
264: Not collective
266: Input Parameter:
267: . A - the KAIJ matrix
269: Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()
272: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273: If NULL is passed, it will not attempt to zero the array pointer.
275: Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281: if (T) *T = NULL;
282: return(0);
283: }
285: /*@
286: MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
288: Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
290: Input Parameters:
291: + A - the KAIJ matrix
292: - B - the AIJ matrix
294: Notes:
295: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
298: Level: advanced
300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305: PetscMPIInt size;
308: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309: if (size == 1) {
310: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311: a->AIJ = B;
312: } else {
313: Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314: a->A = B;
315: }
316: PetscObjectReference((PetscObject)B);
317: return(0);
318: }
320: /*@C
321: MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
323: Logically Collective; the entire S is stored independently on all processes.
325: Input Parameters:
326: + A - the KAIJ matrix
327: . p - the number of rows in S
328: . q - the number of columns in S
329: - S - the S matrix, in form of a scalar array in column-major format
331: Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332: The S matrix is copied, so the user can destroy this array.
334: Level: Advanced
336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
344: PetscFree(a->S);
345: if (S) {
346: PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347: PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348: } else a->S = NULL;
350: a->p = p;
351: a->q = q;
352: return(0);
353: }
355: /*@C
356: MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
358: Logically Collective; the entire T is stored independently on all processes.
360: Input Parameters:
361: + A - the KAIJ matrix
362: . p - the number of rows in S
363: . q - the number of columns in S
364: - T - the T matrix, in form of a scalar array in column-major format
366: Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
367: The T matrix is copied, so the user can destroy this array.
369: Level: Advanced
371: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
372: @*/
373: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
374: {
376: PetscInt i,j;
377: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
378: PetscBool isTI = PETSC_FALSE;
381: /* check if T is an identity matrix */
382: if (T && (p == q)) {
383: isTI = PETSC_TRUE;
384: for (i=0; i<p; i++) {
385: for (j=0; j<q; j++) {
386: if (i == j) {
387: /* diagonal term must be 1 */
388: if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
389: } else {
390: /* off-diagonal term must be 0 */
391: if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
392: }
393: }
394: }
395: }
396: a->isTI = isTI;
398: PetscFree(a->T);
399: if (T && (!isTI)) {
400: PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
401: PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
402: } else a->T = NULL;
404: a->p = p;
405: a->q = q;
406: return(0);
407: }
409: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
410: {
412: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
415: MatDestroy(&b->AIJ);
416: PetscFree(b->S);
417: PetscFree(b->T);
418: PetscFree(b->ibdiag);
419: PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
420: PetscFree(A->data);
421: return(0);
422: }
424: PetscErrorCode MatSetUp_KAIJ(Mat A)
425: {
427: PetscInt n;
428: PetscMPIInt size;
429: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data;
432: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
433: if (size == 1) {
434: MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
435: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
436: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
437: PetscLayoutSetUp(A->rmap);
438: PetscLayoutSetUp(A->cmap);
439: } else {
440: Mat_MPIKAIJ *a;
441: Mat_MPIAIJ *mpiaij;
442: IS from,to;
443: Vec gvec;
444: PetscScalar *T;
445: PetscInt i,j;
447: a = (Mat_MPIKAIJ*)A->data;
448: mpiaij = (Mat_MPIAIJ*)a->A->data;
449: MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
450: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
451: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
452: PetscLayoutSetUp(A->rmap);
453: PetscLayoutSetUp(A->cmap);
455: if (a->isTI) {
456: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
457: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
458: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
459: * to pass in. */
460: PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
461: for (i=0; i<a->p; i++) {
462: for (j=0; j<a->q; j++) {
463: if (i==j) T[i+j*a->p] = 1.0;
464: else T[i+j*a->p] = 0.0;
465: }
466: }
467: } else T = a->T;
468: MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
469: MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
470: if (a->isTI) {
471: PetscFree(T);
472: }
474: VecGetSize(mpiaij->lvec,&n);
475: VecCreate(PETSC_COMM_SELF,&a->w);
476: VecSetSizes(a->w,n*a->q,n*a->q);
477: VecSetBlockSize(a->w,a->q);
478: VecSetType(a->w,VECSEQ);
480: /* create two temporary Index sets for build scatter gather */
481: ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
482: ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);
484: /* create temporary global vector to generate scatter context */
485: VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);
487: /* generate the scatter context */
488: VecScatterCreate(gvec,from,a->w,to,&a->ctx);
490: ISDestroy(&from);
491: ISDestroy(&to);
492: VecDestroy(&gvec);
493: }
495: A->assembled = PETSC_TRUE;
496: return(0);
497: }
499: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
500: {
501: PetscViewerFormat format;
502: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
503: Mat B;
504: PetscInt i;
505: PetscErrorCode ierr;
506: PetscBool ismpikaij;
509: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
510: PetscViewerGetFormat(viewer,&format);
511: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
512: PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);
514: /* Print appropriate details for S. */
515: if (!a->S) {
516: PetscViewerASCIIPrintf(viewer,"S is NULL\n");
517: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
518: PetscViewerASCIIPrintf(viewer,"Entries of S are ");
519: for (i=0; i<(a->p * a->q); i++) {
520: #if defined(PETSC_USE_COMPLEX)
521: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
522: #else
523: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
524: #endif
525: }
526: PetscViewerASCIIPrintf(viewer,"\n");
527: }
529: /* Print appropriate details for T. */
530: if (a->isTI) {
531: PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
532: } else if (!a->T) {
533: PetscViewerASCIIPrintf(viewer,"T is NULL\n");
534: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
535: PetscViewerASCIIPrintf(viewer,"Entries of T are ");
536: for (i=0; i<(a->p * a->q); i++) {
537: #if defined(PETSC_USE_COMPLEX)
538: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
539: #else
540: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
541: #endif
542: }
543: PetscViewerASCIIPrintf(viewer,"\n");
544: }
546: /* Now print details for the AIJ matrix, using the AIJ viewer. */
547: PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
548: if (ismpikaij) {
549: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
550: MatView(b->A,viewer);
551: } else {
552: MatView(a->AIJ,viewer);
553: }
555: } else {
556: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
557: if (ismpikaij) {
558: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
559: } else {
560: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
561: }
562: MatView(B,viewer);
563: MatDestroy(&B);
564: }
565: return(0);
566: }
568: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
569: {
571: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
574: MatDestroy(&b->AIJ);
575: MatDestroy(&b->OAIJ);
576: MatDestroy(&b->A);
577: VecScatterDestroy(&b->ctx);
578: VecDestroy(&b->w);
579: PetscFree(b->S);
580: PetscFree(b->T);
581: PetscFree(b->ibdiag);
582: PetscFree(A->data);
583: return(0);
584: }
586: /* --------------------------------------------------------------------------------------*/
588: /* zz = yy + Axx */
589: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590: {
591: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
592: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
593: const PetscScalar *s = b->S, *t = b->T;
594: const PetscScalar *x,*v,*bx;
595: PetscScalar *y,*sums;
596: PetscErrorCode ierr;
597: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
598: PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k;
601: if (!yy) {
602: VecSet(zz,0.0);
603: } else {
604: VecCopy(yy,zz);
605: }
606: if ((!s) && (!t) && (!b->isTI)) return(0);
608: VecGetArrayRead(xx,&x);
609: VecGetArray(zz,&y);
610: idx = a->j;
611: v = a->a;
612: ii = a->i;
614: if (b->isTI) {
615: for (i=0; i<m; i++) {
616: jrow = ii[i];
617: n = ii[i+1] - jrow;
618: sums = y + p*i;
619: for (j=0; j<n; j++) {
620: for (k=0; k<p; k++) {
621: sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
622: }
623: }
624: }
625: PetscLogFlops((a->nz)*3*p);
626: } else if (t) {
627: for (i=0; i<m; i++) {
628: jrow = ii[i];
629: n = ii[i+1] - jrow;
630: sums = y + p*i;
631: for (j=0; j<n; j++) {
632: for (k=0; k<p; k++) {
633: for (l=0; l<q; l++) {
634: sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
635: }
636: }
637: }
638: }
639: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
640: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
641: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
642: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
643: PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);
644: }
645: if (s) {
646: for (i=0; i<m; i++) {
647: sums = y + p*i;
648: bx = x + q*i;
649: if (i < b->AIJ->cmap->n) {
650: for (j=0; j<q; j++) {
651: for (k=0; k<p; k++) {
652: sums[k] += s[k+j*p]*bx[j];
653: }
654: }
655: }
656: }
657: PetscLogFlops(m*2*p*q);
658: }
660: VecRestoreArrayRead(xx,&x);
661: VecRestoreArray(zz,&y);
662: return(0);
663: }
665: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
666: {
669: MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
670: return(0);
671: }
673: #include <petsc/private/kernels/blockinvert.h>
675: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
676: {
677: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
678: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
679: const PetscScalar *S = b->S;
680: const PetscScalar *T = b->T;
681: const PetscScalar *v = a->a;
682: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
683: PetscErrorCode ierr;
684: PetscInt i,j,*v_pivots,dof,dof2;
685: PetscScalar *diag,aval,*v_work;
688: if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
689: if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
691: dof = p;
692: dof2 = dof*dof;
694: if (b->ibdiagvalid) {
695: if (values) *values = b->ibdiag;
696: return(0);
697: }
698: if (!b->ibdiag) {
699: PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
700: PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
701: }
702: if (values) *values = b->ibdiag;
703: diag = b->ibdiag;
705: PetscMalloc2(dof,&v_work,dof,&v_pivots);
706: for (i=0; i<m; i++) {
707: if (S) {
708: PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
709: } else {
710: PetscMemzero(diag,dof2*sizeof(PetscScalar));
711: }
712: if (b->isTI) {
713: aval = 0;
714: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
715: for (j=0; j<dof; j++) diag[j+dof*j] += aval;
716: } else if (T) {
717: aval = 0;
718: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
719: for (j=0; j<dof2; j++) diag[j] += aval*T[j];
720: }
721: PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
722: diag += dof2;
723: }
724: PetscFree2(v_work,v_pivots);
726: b->ibdiagvalid = PETSC_TRUE;
727: return(0);
728: }
730: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
731: {
732: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
735: *B = kaij->AIJ;
736: return(0);
737: }
739: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
740: {
741: PetscErrorCode ierr;
742: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data;
743: Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data;
744: const PetscScalar *aa = a->a, *T = kaij->T, *v;
745: const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
746: const PetscScalar *b, *xb, *idiag;
747: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
748: PetscInt i, j, k, i2, bs, bs2, nz;
751: its = its*lits;
752: if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
753: if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
754: if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
755: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
756: if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
757: else {bs = p; bs2 = bs*bs; }
759: if (!m) return(0);
761: if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
762: idiag = kaij->ibdiag;
763: diag = a->diag;
765: if (!kaij->sor.setup) {
766: PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
767: kaij->sor.setup = PETSC_TRUE;
768: }
769: y = kaij->sor.y;
770: w = kaij->sor.w;
771: work = kaij->sor.work;
772: t = kaij->sor.t;
773: arr = kaij->sor.arr;
775: VecGetArray(xx,&x);
776: VecGetArrayRead(bb,&b);
778: if (flag & SOR_ZERO_INITIAL_GUESS) {
779: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
780: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */
781: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
782: i2 = bs;
783: idiag += bs2;
784: for (i=1; i<m; i++) {
785: v = aa + ai[i];
786: vi = aj + ai[i];
787: nz = diag[i] - ai[i];
789: if (T) { /* b - T (Arow * x) */
790: PetscMemzero(w,bs*sizeof(PetscScalar));
791: for (j=0; j<nz; j++) {
792: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
793: }
794: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
795: for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
796: } else if (kaij->isTI) {
797: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
798: for (j=0; j<nz; j++) {
799: for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
800: }
801: } else {
802: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
803: }
805: PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
806: for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
808: idiag += bs2;
809: i2 += bs;
810: }
811: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
812: PetscLogFlops(1.0*bs2*a->nz);
813: xb = t;
814: } else xb = b;
815: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
816: idiag = kaij->ibdiag+bs2*(m-1);
817: i2 = bs * (m-1);
818: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
819: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
820: i2 -= bs;
821: idiag -= bs2;
822: for (i=m-2; i>=0; i--) {
823: v = aa + diag[i] + 1 ;
824: vi = aj + diag[i] + 1;
825: nz = ai[i+1] - diag[i] - 1;
827: if (T) { /* FIXME: This branch untested */
828: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
829: /* copy all rows of x that are needed into contiguous space */
830: workt = work;
831: for (j=0; j<nz; j++) {
832: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
833: workt += bs;
834: }
835: arrt = arr;
836: for (j=0; j<nz; j++) {
837: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
838: for (k=0; k<bs2; k++) arrt[k] *= v[j];
839: arrt += bs2;
840: }
841: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
842: } else if (kaij->isTI) {
843: PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
844: for (j=0; j<nz; j++) {
845: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
846: }
847: }
849: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
850: for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
852: idiag -= bs2;
853: i2 -= bs;
854: }
855: PetscLogFlops(1.0*bs2*(a->nz));
856: }
857: its--;
858: }
859: while (its--) { /* FIXME: This branch not updated */
860: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
861: i2 = 0;
862: idiag = kaij->ibdiag;
863: for (i=0; i<m; i++) {
864: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
866: v = aa + ai[i];
867: vi = aj + ai[i];
868: nz = diag[i] - ai[i];
869: workt = work;
870: for (j=0; j<nz; j++) {
871: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
872: workt += bs;
873: }
874: arrt = arr;
875: if (T) {
876: for (j=0; j<nz; j++) {
877: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
878: for (k=0; k<bs2; k++) arrt[k] *= v[j];
879: arrt += bs2;
880: }
881: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
882: } else if (kaij->isTI) {
883: for (j=0; j<nz; j++) {
884: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
885: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
886: arrt += bs2;
887: }
888: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
889: }
890: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
892: v = aa + diag[i] + 1;
893: vi = aj + diag[i] + 1;
894: nz = ai[i+1] - diag[i] - 1;
895: workt = work;
896: for (j=0; j<nz; j++) {
897: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
898: workt += bs;
899: }
900: arrt = arr;
901: if (T) {
902: for (j=0; j<nz; j++) {
903: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
904: for (k=0; k<bs2; k++) arrt[k] *= v[j];
905: arrt += bs2;
906: }
907: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908: } else if (kaij->isTI) {
909: for (j=0; j<nz; j++) {
910: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
911: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
912: arrt += bs2;
913: }
914: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
915: }
917: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
918: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
920: idiag += bs2;
921: i2 += bs;
922: }
923: xb = t;
924: }
925: else xb = b;
926: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
927: idiag = kaij->ibdiag+bs2*(m-1);
928: i2 = bs * (m-1);
929: if (xb == b) {
930: for (i=m-1; i>=0; i--) {
931: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
932:
933: v = aa + ai[i];
934: vi = aj + ai[i];
935: nz = diag[i] - ai[i];
936: workt = work;
937: for (j=0; j<nz; j++) {
938: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
939: workt += bs;
940: }
941: arrt = arr;
942: if (T) {
943: for (j=0; j<nz; j++) {
944: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
945: for (k=0; k<bs2; k++) arrt[k] *= v[j];
946: arrt += bs2;
947: }
948: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949: } else if (kaij->isTI) {
950: for (j=0; j<nz; j++) {
951: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
952: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953: arrt += bs2;
954: }
955: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956: }
958: v = aa + diag[i] + 1;
959: vi = aj + diag[i] + 1;
960: nz = ai[i+1] - diag[i] - 1;
961: workt = work;
962: for (j=0; j<nz; j++) {
963: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
964: workt += bs;
965: }
966: arrt = arr;
967: if (T) {
968: for (j=0; j<nz; j++) {
969: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
970: for (k=0; k<bs2; k++) arrt[k] *= v[j];
971: arrt += bs2;
972: }
973: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974: } else if (kaij->isTI) {
975: for (j=0; j<nz; j++) {
976: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
977: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978: arrt += bs2;
979: }
980: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981: }
983: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
985: }
986: } else {
987: for (i=m-1; i>=0; i--) {
988: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
989: v = aa + diag[i] + 1;
990: vi = aj + diag[i] + 1;
991: nz = ai[i+1] - diag[i] - 1;
992: workt = work;
993: for (j=0; j<nz; j++) {
994: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
995: workt += bs;
996: }
997: arrt = arr;
998: if (T) {
999: for (j=0; j<nz; j++) {
1000: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1001: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1002: arrt += bs2;
1003: }
1004: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1005: } else if (kaij->isTI) {
1006: for (j=0; j<nz; j++) {
1007: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1008: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1009: arrt += bs2;
1010: }
1011: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1012: }
1013: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1014: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1015: }
1016: }
1017: PetscLogFlops(1.0*bs2*(a->nz));
1018: }
1019: }
1021: VecRestoreArray(xx,&x);
1022: VecRestoreArrayRead(bb,&b);
1023: return(0);
1024: }
1026: /*===================================================================================*/
1028: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1029: {
1030: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1034: if (!yy) {
1035: VecSet(zz,0.0);
1036: } else {
1037: VecCopy(yy,zz);
1038: }
1039: /* start the scatter */
1040: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1041: (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1042: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1043: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1044: return(0);
1045: }
1047: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1048: {
1051: MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1052: return(0);
1053: }
1055: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1056: {
1057: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1058: PetscErrorCode ierr;
1061: (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1062: return(0);
1063: }
1065: /* ----------------------------------------------------------------*/
1067: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1068: {
1069: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data;
1070: PetscErrorCode diag = PETSC_FALSE;
1071: PetscErrorCode ierr;
1072: PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1073: PetscScalar *vaij,*v,*S=b->S,*T=b->T;
1076: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1077: b->getrowactive = PETSC_TRUE;
1078: if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1080: if ((!S) && (!T) && (!b->isTI)) {
1081: if (ncols) *ncols = 0;
1082: if (cols) *cols = NULL;
1083: if (values) *values = NULL;
1084: return(0);
1085: }
1087: if (T || b->isTI) {
1088: MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1089: c = nzaij;
1090: for (i=0; i<nzaij; i++) {
1091: /* check if this row contains a diagonal entry */
1092: if (colsaij[i] == r) {
1093: diag = PETSC_TRUE;
1094: c = i;
1095: }
1096: }
1097: } else nzaij = c = 0;
1099: /* calculate size of row */
1100: nz = 0;
1101: if (S) nz += q;
1102: if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1104: if (cols || values) {
1105: PetscMalloc2(nz,&idx,nz,&v);
1106: for (i=0; i<q; i++) {
1107: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1108: v[i] = 0.0;
1109: }
1110: if (b->isTI) {
1111: for (i=0; i<nzaij; i++) {
1112: for (j=0; j<q; j++) {
1113: idx[i*q+j] = colsaij[i]*q+j;
1114: v[i*q+j] = (j==s ? vaij[i] : 0);
1115: }
1116: }
1117: } else if (T) {
1118: for (i=0; i<nzaij; i++) {
1119: for (j=0; j<q; j++) {
1120: idx[i*q+j] = colsaij[i]*q+j;
1121: v[i*q+j] = vaij[i]*T[s+j*p];
1122: }
1123: }
1124: }
1125: if (S) {
1126: for (j=0; j<q; j++) {
1127: idx[c*q+j] = r*q+j;
1128: v[c*q+j] += S[s+j*p];
1129: }
1130: }
1131: }
1133: if (ncols) *ncols = nz;
1134: if (cols) *cols = idx;
1135: if (values) *values = v;
1136: return(0);
1137: }
1139: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1140: {
1143: PetscFree2(*idx,*v);
1144: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1145: return(0);
1146: }
1148: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1149: {
1150: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data;
1151: Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1152: Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1153: Mat AIJ = b->A;
1154: PetscBool diag = PETSC_FALSE;
1155: PetscErrorCode ierr;
1156: const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1157: PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1158: PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T;
1161: if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1162: b->getrowactive = PETSC_TRUE;
1163: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1164: lrow = row - rstart;
1166: if ((!S) && (!T) && (!b->isTI)) {
1167: if (ncols) *ncols = 0;
1168: if (cols) *cols = NULL;
1169: if (values) *values = NULL;
1170: return(0);
1171: }
1173: r = lrow/p;
1174: s = lrow%p;
1176: if (T || b->isTI) {
1177: MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1178: MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1179: MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);
1181: c = ncolsaij + ncolsoaij;
1182: for (i=0; i<ncolsaij; i++) {
1183: /* check if this row contains a diagonal entry */
1184: if (colsaij[i] == r) {
1185: diag = PETSC_TRUE;
1186: c = i;
1187: }
1188: }
1189: } else c = 0;
1191: /* calculate size of row */
1192: nz = 0;
1193: if (S) nz += q;
1194: if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1196: if (cols || values) {
1197: PetscMalloc2(nz,&idx,nz,&v);
1198: for (i=0; i<q; i++) {
1199: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1200: v[i] = 0.0;
1201: }
1202: if (b->isTI) {
1203: for (i=0; i<ncolsaij; i++) {
1204: for (j=0; j<q; j++) {
1205: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1206: v[i*q+j] = (j==s ? vals[i] : 0.0);
1207: }
1208: }
1209: for (i=0; i<ncolsoaij; i++) {
1210: for (j=0; j<q; j++) {
1211: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1212: v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0);
1213: }
1214: }
1215: } else if (T) {
1216: for (i=0; i<ncolsaij; i++) {
1217: for (j=0; j<q; j++) {
1218: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1219: v[i*q+j] = vals[i]*T[s+j*p];
1220: }
1221: }
1222: for (i=0; i<ncolsoaij; i++) {
1223: for (j=0; j<q; j++) {
1224: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1225: v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p];
1226: }
1227: }
1228: }
1229: if (S) {
1230: for (j=0; j<q; j++) {
1231: idx[c*q+j] = (r+rstart/p)*q+j;
1232: v[c*q+j] += S[s+j*p];
1233: }
1234: }
1235: }
1237: if (ncols) *ncols = nz;
1238: if (cols) *cols = idx;
1239: if (values) *values = v;
1240: return(0);
1241: }
1243: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1244: {
1247: PetscFree2(*idx,*v);
1248: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1249: return(0);
1250: }
1252: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1253: {
1255: Mat A;
1258: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1259: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1260: MatDestroy(&A);
1261: return(0);
1262: }
1264: /* ---------------------------------------------------------------------------------- */
1265: /*@C
1266: MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1268: [I \otimes S + A \otimes T]
1270: where
1271: S is a dense (p \times q) matrix
1272: T is a dense (p \times q) matrix
1273: A is an AIJ (n \times n) matrix
1274: I is the identity matrix
1275: The resulting matrix is (np \times nq)
1276:
1277: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1278:
1279: Collective
1281: Input Parameters:
1282: + A - the AIJ matrix
1283: . p - number of rows in S and T
1284: . q - number of columns in S and T
1285: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1286: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1288: Output Parameter:
1289: . kaij - the new KAIJ matrix
1291: Notes:
1292: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1293: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1295: Level: advanced
1297: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1298: @*/
1299: PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1300: {
1302: PetscMPIInt size;
1305: MatCreate(PetscObjectComm((PetscObject)A),kaij);
1306: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1307: if (size == 1) {
1308: MatSetType(*kaij,MATSEQKAIJ);
1309: } else {
1310: MatSetType(*kaij,MATMPIKAIJ);
1311: }
1312: MatKAIJSetAIJ(*kaij,A);
1313: MatKAIJSetS(*kaij,p,q,S);
1314: MatKAIJSetT(*kaij,p,q,T);
1315: MatSetUp(*kaij);
1316: return(0);
1317: }
1319: /*MC
1320: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1321: [I \otimes S + A \otimes T],
1322: where
1323: S is a dense (p \times q) matrix,
1324: T is a dense (p \times q) matrix,
1325: A is an AIJ (n \times n) matrix,
1326: and I is the identity matrix.
1327: The resulting matrix is (np \times nq).
1328:
1329: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1331: Notes:
1332: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1333: where x and b are column vectors containing the row-major representations of X and B.
1335: Level: advanced
1337: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1338: M*/
1340: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1341: {
1343: Mat_MPIKAIJ *b;
1344: PetscMPIInt size;
1347: PetscNewLog(A,&b);
1348: A->data = (void*)b;
1350: PetscMemzero(A->ops,sizeof(struct _MatOps));
1352: A->ops->setup = MatSetUp_KAIJ;
1354: b->w = 0;
1355: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1356: if (size == 1) {
1357: PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1358: A->ops->setup = MatSetUp_KAIJ;
1359: A->ops->destroy = MatDestroy_SeqKAIJ;
1360: A->ops->view = MatView_KAIJ;
1361: A->ops->mult = MatMult_SeqKAIJ;
1362: A->ops->multadd = MatMultAdd_SeqKAIJ;
1363: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1364: A->ops->getrow = MatGetRow_SeqKAIJ;
1365: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1366: A->ops->sor = MatSOR_SeqKAIJ;
1367: } else {
1368: PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1369: A->ops->setup = MatSetUp_KAIJ;
1370: A->ops->destroy = MatDestroy_MPIKAIJ;
1371: A->ops->view = MatView_KAIJ;
1372: A->ops->mult = MatMult_MPIKAIJ;
1373: A->ops->multadd = MatMultAdd_MPIKAIJ;
1374: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1375: A->ops->getrow = MatGetRow_MPIKAIJ;
1376: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1377: PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1378: }
1379: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1380: return(0);
1381: }