Actual source code: kaij.c
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
33: Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
35: Input Parameter:
36: . A - the KAIJ matrix
38: Output Parameter:
39: . B - the AIJ matrix
41: Level: advanced
43: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
45: .seealso: MatCreateKAIJ()
46: @*/
47: PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B)
48: {
49: PetscBool ismpikaij,isseqkaij;
51: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
52: PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
53: if (ismpikaij) {
54: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56: *B = b->A;
57: } else if (isseqkaij) {
58: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60: *B = b->AIJ;
61: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
62: return 0;
63: }
65: /*@C
66: MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68: Not Collective; the entire S is stored and returned independently on all processes.
70: Input Parameter:
71: . A - the KAIJ matrix
73: Output Parameters:
74: + m - the number of rows in S
75: . n - the number of columns in S
76: - S - the S matrix, in form of a scalar array in column-major format
78: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80: Level: advanced
82: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
83: @*/
84: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
85: {
86: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
87: if (m) *m = b->p;
88: if (n) *n = b->q;
89: if (S) *S = b->S;
90: return 0;
91: }
93: /*@C
94: MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
96: Not Collective; the entire S is stored and returned independently on all processes.
98: Input Parameter:
99: . A - the KAIJ matrix
101: Output Parameters:
102: + m - the number of rows in S
103: . n - the number of columns in S
104: - S - the S matrix, in form of a scalar array in column-major format
106: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
108: Level: advanced
110: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
111: @*/
112: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
113: {
114: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
115: if (m) *m = b->p;
116: if (n) *n = b->q;
117: if (S) *S = b->S;
118: return 0;
119: }
121: /*@C
122: MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
124: Not collective
126: Input Parameter:
127: . A - the KAIJ matrix
129: Output Parameter:
130: . S - location of pointer to array obtained with MatKAIJGetS()
132: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
133: If NULL is passed, it will not attempt to zero the array pointer.
135: Level: advanced
136: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
137: @*/
138: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
139: {
140: if (S) *S = NULL;
141: PetscObjectStateIncrease((PetscObject)A);
142: return 0;
143: }
145: /*@C
146: MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
148: Not collective
150: Input Parameter:
151: . A - the KAIJ matrix
153: Output Parameter:
154: . S - location of pointer to array obtained with MatKAIJGetS()
156: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
157: If NULL is passed, it will not attempt to zero the array pointer.
159: Level: advanced
160: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
161: @*/
162: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
163: {
164: if (S) *S = NULL;
165: return 0;
166: }
168: /*@C
169: MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
171: Not Collective; the entire T is stored and returned independently on all processes
173: Input Parameter:
174: . A - the KAIJ matrix
176: Output Parameters:
177: + m - the number of rows in T
178: . n - the number of columns in T
179: - T - the T matrix, in form of a scalar array in column-major format
181: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
183: Level: advanced
185: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
186: @*/
187: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
188: {
189: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
190: if (m) *m = b->p;
191: if (n) *n = b->q;
192: if (T) *T = b->T;
193: return 0;
194: }
196: /*@C
197: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
199: Not Collective; the entire T is stored and returned independently on all processes
201: Input Parameter:
202: . A - the KAIJ matrix
204: Output Parameters:
205: + m - the number of rows in T
206: . n - the number of columns in T
207: - T - the T matrix, in form of a scalar array in column-major format
209: Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
211: Level: advanced
213: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
214: @*/
215: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
216: {
217: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
218: if (m) *m = b->p;
219: if (n) *n = b->q;
220: if (T) *T = b->T;
221: return 0;
222: }
224: /*@C
225: MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
227: Not collective
229: Input Parameter:
230: . A - the KAIJ matrix
232: Output Parameter:
233: . T - location of pointer to array obtained with MatKAIJGetS()
235: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
236: If NULL is passed, it will not attempt to zero the array pointer.
238: Level: advanced
239: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
240: @*/
241: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
242: {
243: if (T) *T = NULL;
244: PetscObjectStateIncrease((PetscObject)A);
245: return 0;
246: }
248: /*@C
249: MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
251: Not collective
253: Input Parameter:
254: . A - the KAIJ matrix
256: Output Parameter:
257: . T - location of pointer to array obtained with MatKAIJGetS()
259: Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
260: If NULL is passed, it will not attempt to zero the array pointer.
262: Level: advanced
263: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
264: @*/
265: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
266: {
267: if (T) *T = NULL;
268: return 0;
269: }
271: /*@
272: MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
274: Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
276: Input Parameters:
277: + A - the KAIJ matrix
278: - B - the AIJ matrix
280: Notes:
281: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
282: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
284: Level: advanced
286: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
287: @*/
288: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
289: {
290: PetscMPIInt size;
291: PetscBool flg;
293: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
294: if (size == 1) {
295: PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg);
297: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
298: a->AIJ = B;
299: } else {
300: Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
301: a->A = B;
302: }
303: PetscObjectReference((PetscObject)B);
304: return 0;
305: }
307: /*@C
308: MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
310: Logically Collective; the entire S is stored independently on all processes.
312: Input Parameters:
313: + A - the KAIJ matrix
314: . p - the number of rows in S
315: . q - the number of columns in S
316: - S - the S matrix, in form of a scalar array in column-major format
318: Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
319: The S matrix is copied, so the user can destroy this array.
321: Level: Advanced
323: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
324: @*/
325: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
326: {
327: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
329: PetscFree(a->S);
330: if (S) {
331: PetscMalloc1(p*q,&a->S);
332: PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
333: } else a->S = NULL;
335: a->p = p;
336: a->q = q;
337: return 0;
338: }
340: /*@C
341: MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
343: Logically Collective.
345: Input Parameter:
346: . A - the KAIJ matrix
348: Output Parameter:
349: . identity - the Boolean value
351: Level: Advanced
353: .seealso: MatKAIJGetS(), MatKAIJGetT()
354: @*/
355: PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
356: {
357: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
358: PetscInt i,j;
360: if (a->p != a->q) {
361: *identity = PETSC_FALSE;
362: return 0;
363: } else *identity = PETSC_TRUE;
364: if (!a->isTI || a->S) {
365: for (i=0; i<a->p && *identity; i++) {
366: for (j=0; j<a->p && *identity; j++) {
367: if (i != j) {
368: if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
369: if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
370: } else {
371: if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
372: if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
373: }
374: }
375: }
376: }
377: return 0;
378: }
380: /*@C
381: MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
383: Logically Collective; the entire T is stored independently on all processes.
385: Input Parameters:
386: + A - the KAIJ matrix
387: . p - the number of rows in S
388: . q - the number of columns in S
389: - T - the T matrix, in form of a scalar array in column-major format
391: Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
392: The T matrix is copied, so the user can destroy this array.
394: Level: Advanced
396: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
397: @*/
398: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
399: {
400: PetscInt i,j;
401: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
402: PetscBool isTI = PETSC_FALSE;
404: /* check if T is an identity matrix */
405: if (T && (p == q)) {
406: isTI = PETSC_TRUE;
407: for (i=0; i<p; i++) {
408: for (j=0; j<q; j++) {
409: if (i == j) {
410: /* diagonal term must be 1 */
411: if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
412: } else {
413: /* off-diagonal term must be 0 */
414: if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
415: }
416: }
417: }
418: }
419: a->isTI = isTI;
421: PetscFree(a->T);
422: if (T && (!isTI)) {
423: PetscMalloc1(p*q,&a->T);
424: PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
425: } else a->T = NULL;
427: a->p = p;
428: a->q = q;
429: return 0;
430: }
432: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
433: {
434: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
436: MatDestroy(&b->AIJ);
437: PetscFree(b->S);
438: PetscFree(b->T);
439: PetscFree(b->ibdiag);
440: PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
441: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL);
442: PetscFree(A->data);
443: return 0;
444: }
446: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
447: {
448: Mat_MPIKAIJ *a;
449: Mat_MPIAIJ *mpiaij;
450: PetscScalar *T;
451: PetscInt i,j;
452: PetscObjectState state;
454: a = (Mat_MPIKAIJ*)A->data;
455: mpiaij = (Mat_MPIAIJ*)a->A->data;
457: PetscObjectStateGet((PetscObject)a->A,&state);
458: if (state == a->state) {
459: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
460: return 0;
461: } else {
462: MatDestroy(&a->AIJ);
463: MatDestroy(&a->OAIJ);
464: if (a->isTI) {
465: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
466: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
467: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
468: * to pass in. */
469: PetscMalloc1(a->p*a->q,&T);
470: for (i=0; i<a->p; i++) {
471: for (j=0; j<a->q; j++) {
472: if (i==j) T[i+j*a->p] = 1.0;
473: else T[i+j*a->p] = 0.0;
474: }
475: }
476: } else T = a->T;
477: MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
478: MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
479: if (a->isTI) {
480: PetscFree(T);
481: }
482: a->state = state;
483: }
485: return 0;
486: }
488: PetscErrorCode MatSetUp_KAIJ(Mat A)
489: {
490: PetscInt n;
491: PetscMPIInt size;
492: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data;
494: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
495: if (size == 1) {
496: MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
497: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
498: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
499: PetscLayoutSetUp(A->rmap);
500: PetscLayoutSetUp(A->cmap);
501: } else {
502: Mat_MPIKAIJ *a;
503: Mat_MPIAIJ *mpiaij;
504: IS from,to;
505: Vec gvec;
507: a = (Mat_MPIKAIJ*)A->data;
508: mpiaij = (Mat_MPIAIJ*)a->A->data;
509: MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
510: PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
511: PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
512: PetscLayoutSetUp(A->rmap);
513: PetscLayoutSetUp(A->cmap);
515: MatKAIJ_build_AIJ_OAIJ(A);
517: VecGetSize(mpiaij->lvec,&n);
518: VecCreate(PETSC_COMM_SELF,&a->w);
519: VecSetSizes(a->w,n*a->q,n*a->q);
520: VecSetBlockSize(a->w,a->q);
521: VecSetType(a->w,VECSEQ);
523: /* create two temporary Index sets for build scatter gather */
524: ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
525: ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);
527: /* create temporary global vector to generate scatter context */
528: VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);
530: /* generate the scatter context */
531: VecScatterCreate(gvec,from,a->w,to,&a->ctx);
533: ISDestroy(&from);
534: ISDestroy(&to);
535: VecDestroy(&gvec);
536: }
538: A->assembled = PETSC_TRUE;
539: return 0;
540: }
542: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
543: {
544: PetscViewerFormat format;
545: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
546: Mat B;
547: PetscInt i;
548: PetscBool ismpikaij;
550: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
551: PetscViewerGetFormat(viewer,&format);
552: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
553: PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q);
555: /* Print appropriate details for S. */
556: if (!a->S) {
557: PetscViewerASCIIPrintf(viewer,"S is NULL\n");
558: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
559: PetscViewerASCIIPrintf(viewer,"Entries of S are ");
560: for (i=0; i<(a->p * a->q); i++) {
561: #if defined(PETSC_USE_COMPLEX)
562: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
563: #else
564: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
565: #endif
566: }
567: PetscViewerASCIIPrintf(viewer,"\n");
568: }
570: /* Print appropriate details for T. */
571: if (a->isTI) {
572: PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
573: } else if (!a->T) {
574: PetscViewerASCIIPrintf(viewer,"T is NULL\n");
575: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576: PetscViewerASCIIPrintf(viewer,"Entries of T are ");
577: for (i=0; i<(a->p * a->q); i++) {
578: #if defined(PETSC_USE_COMPLEX)
579: PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
580: #else
581: PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
582: #endif
583: }
584: PetscViewerASCIIPrintf(viewer,"\n");
585: }
587: /* Now print details for the AIJ matrix, using the AIJ viewer. */
588: PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
589: if (ismpikaij) {
590: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
591: MatView(b->A,viewer);
592: } else {
593: MatView(a->AIJ,viewer);
594: }
596: } else {
597: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
598: MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B);
599: MatView(B,viewer);
600: MatDestroy(&B);
601: }
602: return 0;
603: }
605: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
606: {
607: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
609: MatDestroy(&b->AIJ);
610: MatDestroy(&b->OAIJ);
611: MatDestroy(&b->A);
612: VecScatterDestroy(&b->ctx);
613: VecDestroy(&b->w);
614: PetscFree(b->S);
615: PetscFree(b->T);
616: PetscFree(b->ibdiag);
617: PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL);
618: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL);
619: PetscFree(A->data);
620: return 0;
621: }
623: /* --------------------------------------------------------------------------------------*/
625: /* zz = yy + Axx */
626: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627: {
628: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
629: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
630: const PetscScalar *s = b->S, *t = b->T;
631: const PetscScalar *x,*v,*bx;
632: PetscScalar *y,*sums;
633: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
634: PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k;
636: if (!yy) {
637: VecSet(zz,0.0);
638: } else {
639: VecCopy(yy,zz);
640: }
641: if ((!s) && (!t) && (!b->isTI)) return 0;
643: VecGetArrayRead(xx,&x);
644: VecGetArray(zz,&y);
645: idx = a->j;
646: v = a->a;
647: ii = a->i;
649: if (b->isTI) {
650: for (i=0; i<m; i++) {
651: jrow = ii[i];
652: n = ii[i+1] - jrow;
653: sums = y + p*i;
654: for (j=0; j<n; j++) {
655: for (k=0; k<p; k++) {
656: sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
657: }
658: }
659: }
660: PetscLogFlops(3.0*(a->nz)*p);
661: } else if (t) {
662: for (i=0; i<m; i++) {
663: jrow = ii[i];
664: n = ii[i+1] - jrow;
665: sums = y + p*i;
666: for (j=0; j<n; j++) {
667: for (k=0; k<p; k++) {
668: for (l=0; l<q; l++) {
669: sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
670: }
671: }
672: }
673: }
674: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
675: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
676: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
677: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
678: PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);
679: }
680: if (s) {
681: for (i=0; i<m; i++) {
682: sums = y + p*i;
683: bx = x + q*i;
684: if (i < b->AIJ->cmap->n) {
685: for (j=0; j<q; j++) {
686: for (k=0; k<p; k++) {
687: sums[k] += s[k+j*p]*bx[j];
688: }
689: }
690: }
691: }
692: PetscLogFlops(2.0*m*p*q);
693: }
695: VecRestoreArrayRead(xx,&x);
696: VecRestoreArray(zz,&y);
697: return 0;
698: }
700: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
701: {
702: MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
703: return 0;
704: }
706: #include <petsc/private/kernels/blockinvert.h>
708: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
709: {
710: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
712: const PetscScalar *S = b->S;
713: const PetscScalar *T = b->T;
714: const PetscScalar *v = a->a;
715: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
716: PetscInt i,j,*v_pivots,dof,dof2;
717: PetscScalar *diag,aval,*v_work;
722: dof = p;
723: dof2 = dof*dof;
725: if (b->ibdiagvalid) {
726: if (values) *values = b->ibdiag;
727: return 0;
728: }
729: if (!b->ibdiag) {
730: PetscMalloc1(dof2*m,&b->ibdiag);
731: PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
732: }
733: if (values) *values = b->ibdiag;
734: diag = b->ibdiag;
736: PetscMalloc2(dof,&v_work,dof,&v_pivots);
737: for (i=0; i<m; i++) {
738: if (S) {
739: PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
740: } else {
741: PetscMemzero(diag,dof2*sizeof(PetscScalar));
742: }
743: if (b->isTI) {
744: aval = 0;
745: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
746: for (j=0; j<dof; j++) diag[j+dof*j] += aval;
747: } else if (T) {
748: aval = 0;
749: for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
750: for (j=0; j<dof2; j++) diag[j] += aval*T[j];
751: }
752: PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
753: diag += dof2;
754: }
755: PetscFree2(v_work,v_pivots);
757: b->ibdiagvalid = PETSC_TRUE;
758: return 0;
759: }
761: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
762: {
763: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
765: *B = kaij->AIJ;
766: return 0;
767: }
769: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
770: {
771: Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
772: Mat AIJ,OAIJ,B;
773: PetscInt *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
774: const PetscInt p = a->p,q = a->q;
775: PetscBool ismpikaij,missing;
777: if (reuse != MAT_REUSE_MATRIX) {
778: PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
779: if (ismpikaij) {
780: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
781: AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
782: OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
783: } else {
784: AIJ = a->AIJ;
785: OAIJ = NULL;
786: }
787: MatCreate(PetscObjectComm((PetscObject)A),&B);
788: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
789: MatSetType(B,MATAIJ);
790: MatGetSize(AIJ,&m,NULL);
791: MatMissingDiagonal(AIJ,&missing,&d); /* assumption that all successive rows will have a missing diagonal */
792: if (!missing || !a->S) d = m;
793: PetscMalloc1(m*p,&d_nnz);
794: for (i = 0; i < m; ++i) {
795: MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);
796: for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
797: MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL);
798: }
799: if (OAIJ) {
800: PetscMalloc1(m*p,&o_nnz);
801: for (i = 0; i < m; ++i) {
802: MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);
803: for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
804: MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL);
805: }
806: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
807: } else {
808: MatSeqAIJSetPreallocation(B,0,d_nnz);
809: }
810: PetscFree(d_nnz);
811: PetscFree(o_nnz);
812: } else B = *newmat;
813: MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B);
814: if (reuse == MAT_INPLACE_MATRIX) {
815: MatHeaderReplace(A,&B);
816: } else *newmat = B;
817: return 0;
818: }
820: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
821: {
822: PetscErrorCode ierr;
823: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data;
824: Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data;
825: const PetscScalar *aa = a->a, *T = kaij->T, *v;
826: const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
827: const PetscScalar *b, *xb, *idiag;
828: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
829: PetscInt i, j, k, i2, bs, bs2, nz;
831: its = its*lits;
837: else {bs = p; bs2 = bs*bs; }
839: if (!m) return 0;
841: if (!kaij->ibdiagvalid) MatInvertBlockDiagonal_SeqKAIJ(A,NULL);
842: idiag = kaij->ibdiag;
843: diag = a->diag;
845: if (!kaij->sor.setup) {
846: PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
847: kaij->sor.setup = PETSC_TRUE;
848: }
849: y = kaij->sor.y;
850: w = kaij->sor.w;
851: work = kaij->sor.work;
852: t = kaij->sor.t;
853: arr = kaij->sor.arr;
855: VecGetArray(xx,&x); ierr;
856: VecGetArrayRead(bb,&b);
858: if (flag & SOR_ZERO_INITIAL_GUESS) {
859: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
860: PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */
861: PetscMemcpy(t,b,bs*sizeof(PetscScalar));
862: i2 = bs;
863: idiag += bs2;
864: for (i=1; i<m; i++) {
865: v = aa + ai[i];
866: vi = aj + ai[i];
867: nz = diag[i] - ai[i];
869: if (T) { /* b - T (Arow * x) */
870: PetscMemzero(w,bs*sizeof(PetscScalar));
871: for (j=0; j<nz; j++) {
872: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
873: }
874: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
875: for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
876: } else if (kaij->isTI) {
877: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
878: for (j=0; j<nz; j++) {
879: for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
880: }
881: } else {
882: PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
883: }
885: PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
886: for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
888: idiag += bs2;
889: i2 += bs;
890: }
891: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
892: PetscLogFlops(1.0*bs2*a->nz);
893: xb = t;
894: } else xb = b;
895: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
896: idiag = kaij->ibdiag+bs2*(m-1);
897: i2 = bs * (m-1);
898: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
899: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
900: i2 -= bs;
901: idiag -= bs2;
902: for (i=m-2; i>=0; i--) {
903: v = aa + diag[i] + 1 ;
904: vi = aj + diag[i] + 1;
905: nz = ai[i+1] - diag[i] - 1;
907: if (T) { /* FIXME: This branch untested */
908: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
909: /* copy all rows of x that are needed into contiguous space */
910: workt = work;
911: for (j=0; j<nz; j++) {
912: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
913: workt += bs;
914: }
915: arrt = arr;
916: for (j=0; j<nz; j++) {
917: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
918: for (k=0; k<bs2; k++) arrt[k] *= v[j];
919: arrt += bs2;
920: }
921: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
922: } else if (kaij->isTI) {
923: PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
924: for (j=0; j<nz; j++) {
925: for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
926: }
927: }
929: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
930: for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
932: idiag -= bs2;
933: i2 -= bs;
934: }
935: PetscLogFlops(1.0*bs2*(a->nz));
936: }
937: its--;
938: }
939: while (its--) { /* FIXME: This branch not updated */
940: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
941: i2 = 0;
942: idiag = kaij->ibdiag;
943: for (i=0; i<m; i++) {
944: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
946: v = aa + ai[i];
947: vi = aj + ai[i];
948: nz = diag[i] - ai[i];
949: workt = work;
950: for (j=0; j<nz; j++) {
951: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
952: workt += bs;
953: }
954: arrt = arr;
955: if (T) {
956: for (j=0; j<nz; j++) {
957: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
958: for (k=0; k<bs2; k++) arrt[k] *= v[j];
959: arrt += bs2;
960: }
961: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
962: } else if (kaij->isTI) {
963: for (j=0; j<nz; j++) {
964: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
965: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
966: arrt += bs2;
967: }
968: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
969: }
970: PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
972: v = aa + diag[i] + 1;
973: vi = aj + diag[i] + 1;
974: nz = ai[i+1] - diag[i] - 1;
975: workt = work;
976: for (j=0; j<nz; j++) {
977: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
978: workt += bs;
979: }
980: arrt = arr;
981: if (T) {
982: for (j=0; j<nz; j++) {
983: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
984: for (k=0; k<bs2; k++) arrt[k] *= v[j];
985: arrt += bs2;
986: }
987: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
988: } else if (kaij->isTI) {
989: for (j=0; j<nz; j++) {
990: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
991: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
992: arrt += bs2;
993: }
994: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
995: }
997: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
998: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1000: idiag += bs2;
1001: i2 += bs;
1002: }
1003: xb = t;
1004: }
1005: else xb = b;
1006: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1007: idiag = kaij->ibdiag+bs2*(m-1);
1008: i2 = bs * (m-1);
1009: if (xb == b) {
1010: for (i=m-1; i>=0; i--) {
1011: PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
1013: v = aa + ai[i];
1014: vi = aj + ai[i];
1015: nz = diag[i] - ai[i];
1016: workt = work;
1017: for (j=0; j<nz; j++) {
1018: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1019: workt += bs;
1020: }
1021: arrt = arr;
1022: if (T) {
1023: for (j=0; j<nz; j++) {
1024: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1025: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1026: arrt += bs2;
1027: }
1028: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1029: } else if (kaij->isTI) {
1030: for (j=0; j<nz; j++) {
1031: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1032: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1033: arrt += bs2;
1034: }
1035: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1036: }
1038: v = aa + diag[i] + 1;
1039: vi = aj + diag[i] + 1;
1040: nz = ai[i+1] - diag[i] - 1;
1041: workt = work;
1042: for (j=0; j<nz; j++) {
1043: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1044: workt += bs;
1045: }
1046: arrt = arr;
1047: if (T) {
1048: for (j=0; j<nz; j++) {
1049: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1050: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1051: arrt += bs2;
1052: }
1053: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1054: } else if (kaij->isTI) {
1055: for (j=0; j<nz; j++) {
1056: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1057: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1058: arrt += bs2;
1059: }
1060: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1061: }
1063: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1064: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1065: }
1066: } else {
1067: for (i=m-1; i>=0; i--) {
1068: PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
1069: v = aa + diag[i] + 1;
1070: vi = aj + diag[i] + 1;
1071: nz = ai[i+1] - diag[i] - 1;
1072: workt = work;
1073: for (j=0; j<nz; j++) {
1074: PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1075: workt += bs;
1076: }
1077: arrt = arr;
1078: if (T) {
1079: for (j=0; j<nz; j++) {
1080: PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1081: for (k=0; k<bs2; k++) arrt[k] *= v[j];
1082: arrt += bs2;
1083: }
1084: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1085: } else if (kaij->isTI) {
1086: for (j=0; j<nz; j++) {
1087: PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1088: for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1089: arrt += bs2;
1090: }
1091: PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1092: }
1093: PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1094: for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1095: }
1096: }
1097: PetscLogFlops(1.0*bs2*(a->nz));
1098: }
1099: }
1101: VecRestoreArray(xx,&x); ierr;
1102: VecRestoreArrayRead(bb,&b);
1103: return 0;
1104: }
1106: /*===================================================================================*/
1108: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1112: if (!yy) {
1113: VecSet(zz,0.0);
1114: } else {
1115: VecCopy(yy,zz);
1116: }
1117: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1118: /* start the scatter */
1119: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1120: (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1121: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1122: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1123: return 0;
1124: }
1126: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1127: {
1128: MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1129: return 0;
1130: }
1132: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1133: {
1134: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
1136: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1137: (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1138: return 0;
1139: }
1141: /* ----------------------------------------------------------------*/
1143: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1144: {
1145: Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data;
1146: PetscErrorCode diag = PETSC_FALSE;
1147: PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1148: PetscScalar *vaij,*v,*S=b->S,*T=b->T;
1151: b->getrowactive = PETSC_TRUE;
1154: if ((!S) && (!T) && (!b->isTI)) {
1155: if (ncols) *ncols = 0;
1156: if (cols) *cols = NULL;
1157: if (values) *values = NULL;
1158: return 0;
1159: }
1161: if (T || b->isTI) {
1162: MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1163: c = nzaij;
1164: for (i=0; i<nzaij; i++) {
1165: /* check if this row contains a diagonal entry */
1166: if (colsaij[i] == r) {
1167: diag = PETSC_TRUE;
1168: c = i;
1169: }
1170: }
1171: } else nzaij = c = 0;
1173: /* calculate size of row */
1174: nz = 0;
1175: if (S) nz += q;
1176: if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1178: if (cols || values) {
1179: PetscMalloc2(nz,&idx,nz,&v);
1180: for (i=0; i<q; i++) {
1181: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1182: v[i] = 0.0;
1183: }
1184: if (b->isTI) {
1185: for (i=0; i<nzaij; i++) {
1186: for (j=0; j<q; j++) {
1187: idx[i*q+j] = colsaij[i]*q+j;
1188: v[i*q+j] = (j==s ? vaij[i] : 0);
1189: }
1190: }
1191: } else if (T) {
1192: for (i=0; i<nzaij; i++) {
1193: for (j=0; j<q; j++) {
1194: idx[i*q+j] = colsaij[i]*q+j;
1195: v[i*q+j] = vaij[i]*T[s+j*p];
1196: }
1197: }
1198: }
1199: if (S) {
1200: for (j=0; j<q; j++) {
1201: idx[c*q+j] = r*q+j;
1202: v[c*q+j] += S[s+j*p];
1203: }
1204: }
1205: }
1207: if (ncols) *ncols = nz;
1208: if (cols) *cols = idx;
1209: if (values) *values = v;
1210: return 0;
1211: }
1213: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214: {
1215: if (nz) *nz = 0;
1216: PetscFree2(*idx,*v);
1217: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1218: return 0;
1219: }
1221: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1222: {
1223: Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data;
1224: Mat AIJ = b->A;
1225: PetscBool diag = PETSC_FALSE;
1226: Mat MatAIJ,MatOAIJ;
1227: const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1228: PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1229: PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T;
1231: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1232: MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1233: MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1235: b->getrowactive = PETSC_TRUE;
1237: lrow = row - rstart;
1239: if ((!S) && (!T) && (!b->isTI)) {
1240: if (ncols) *ncols = 0;
1241: if (cols) *cols = NULL;
1242: if (values) *values = NULL;
1243: return 0;
1244: }
1246: r = lrow/p;
1247: s = lrow%p;
1249: if (T || b->isTI) {
1250: MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1251: MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1252: MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);
1254: c = ncolsaij + ncolsoaij;
1255: for (i=0; i<ncolsaij; i++) {
1256: /* check if this row contains a diagonal entry */
1257: if (colsaij[i] == r) {
1258: diag = PETSC_TRUE;
1259: c = i;
1260: }
1261: }
1262: } else c = 0;
1264: /* calculate size of row */
1265: nz = 0;
1266: if (S) nz += q;
1267: if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1269: if (cols || values) {
1270: PetscMalloc2(nz,&idx,nz,&v);
1271: for (i=0; i<q; i++) {
1272: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1273: v[i] = 0.0;
1274: }
1275: if (b->isTI) {
1276: for (i=0; i<ncolsaij; i++) {
1277: for (j=0; j<q; j++) {
1278: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1279: v[i*q+j] = (j==s ? vals[i] : 0.0);
1280: }
1281: }
1282: for (i=0; i<ncolsoaij; i++) {
1283: for (j=0; j<q; j++) {
1284: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1285: v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0);
1286: }
1287: }
1288: } else if (T) {
1289: for (i=0; i<ncolsaij; i++) {
1290: for (j=0; j<q; j++) {
1291: idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1292: v[i*q+j] = vals[i]*T[s+j*p];
1293: }
1294: }
1295: for (i=0; i<ncolsoaij; i++) {
1296: for (j=0; j<q; j++) {
1297: idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1298: v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p];
1299: }
1300: }
1301: }
1302: if (S) {
1303: for (j=0; j<q; j++) {
1304: idx[c*q+j] = (r+rstart/p)*q+j;
1305: v[c*q+j] += S[s+j*p];
1306: }
1307: }
1308: }
1310: if (ncols) *ncols = nz;
1311: if (cols) *cols = idx;
1312: if (values) *values = v;
1313: return 0;
1314: }
1316: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1317: {
1318: PetscFree2(*idx,*v);
1319: ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1320: return 0;
1321: }
1323: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1324: {
1325: Mat A;
1327: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1328: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1329: MatDestroy(&A);
1330: return 0;
1331: }
1333: /* ---------------------------------------------------------------------------------- */
1334: /*@C
1335: MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1337: [I \otimes S + A \otimes T]
1339: where
1340: S is a dense (p \times q) matrix
1341: T is a dense (p \times q) matrix
1342: A is an AIJ (n \times n) matrix
1343: I is the identity matrix
1344: The resulting matrix is (np \times nq)
1346: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1348: Collective
1350: Input Parameters:
1351: + A - the AIJ matrix
1352: . p - number of rows in S and T
1353: . q - number of columns in S and T
1354: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1355: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1357: Output Parameter:
1358: . kaij - the new KAIJ matrix
1360: Notes:
1361: This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1362: Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1364: Developer Notes:
1365: In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1366: of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1367: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1368: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1370: Level: advanced
1372: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1373: @*/
1374: PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1375: {
1376: MatCreate(PetscObjectComm((PetscObject)A),kaij);
1377: MatSetType(*kaij,MATKAIJ);
1378: MatKAIJSetAIJ(*kaij,A);
1379: MatKAIJSetS(*kaij,p,q,S);
1380: MatKAIJSetT(*kaij,p,q,T);
1381: MatSetUp(*kaij);
1382: return 0;
1383: }
1385: /*MC
1386: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1387: [I \otimes S + A \otimes T],
1388: where
1389: S is a dense (p \times q) matrix,
1390: T is a dense (p \times q) matrix,
1391: A is an AIJ (n \times n) matrix,
1392: and I is the identity matrix.
1393: The resulting matrix is (np \times nq).
1395: S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1397: Notes:
1398: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1399: where x and b are column vectors containing the row-major representations of X and B.
1401: Level: advanced
1403: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1404: M*/
1406: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1407: {
1408: Mat_MPIKAIJ *b;
1409: PetscMPIInt size;
1411: PetscNewLog(A,&b);
1412: A->data = (void*)b;
1414: PetscMemzero(A->ops,sizeof(struct _MatOps));
1416: b->w = NULL;
1417: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1418: if (size == 1) {
1419: PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1420: A->ops->destroy = MatDestroy_SeqKAIJ;
1421: A->ops->mult = MatMult_SeqKAIJ;
1422: A->ops->multadd = MatMultAdd_SeqKAIJ;
1423: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1424: A->ops->getrow = MatGetRow_SeqKAIJ;
1425: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1426: A->ops->sor = MatSOR_SeqKAIJ;
1427: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ);
1428: } else {
1429: PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1430: A->ops->destroy = MatDestroy_MPIKAIJ;
1431: A->ops->mult = MatMult_MPIKAIJ;
1432: A->ops->multadd = MatMultAdd_MPIKAIJ;
1433: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1434: A->ops->getrow = MatGetRow_MPIKAIJ;
1435: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1436: PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1437: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ);
1438: }
1439: A->ops->setup = MatSetUp_KAIJ;
1440: A->ops->view = MatView_KAIJ;
1441: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1442: return 0;
1443: }