Actual source code: math2opusutils.cu

  1: #include <petsc/private/matimpl.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petscsf.h>
  4: #if defined(PETSC_HAVE_CUDA)
  5:   #include <thrust/for_each.h>
  6:   #include <thrust/device_vector.h>
  7:   #include <thrust/execution_policy.h>
  8: #endif

 10: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
 11: {
 12:   PetscSF asf;

 14:   PetscFunctionBegin;
 17:   PetscAssertPointer(osf, 3);
 18:   PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
 19:   if (!asf) {
 20:     PetscInt lda;

 22:     PetscCall(MatDenseGetLDA(A, &lda));
 23:     PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
 24:     PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
 25:     PetscCall(PetscObjectDereference((PetscObject)asf));
 26:   }
 27:   *osf = asf;
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: #if defined(PETSC_HAVE_CUDA)
 32: struct StandardBasis_Functor {
 33:   PetscScalar *v;
 34:   PetscInt     j;

 36:   StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
 37:   __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
 38: };
 39: #endif

 41: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
 42: {
 43: #if defined(PETSC_HAVE_CUDA)
 44:   PetscBool iscuda;
 45: #endif
 46:   PetscInt st, en;

 48:   PetscFunctionBegin;
 49:   PetscCall(VecGetOwnershipRange(x, &st, &en));
 50: #if defined(PETSC_HAVE_CUDA)
 51:   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
 52:   iscuda = (PetscBool)(iscuda && !x->boundtocpu);
 53:   if (iscuda) {
 54:     PetscScalar *ax;
 55:     PetscCall(VecCUDAGetArrayWrite(x, &ax));
 56:     StandardBasis_Functor delta(ax, i - st);
 57:     thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
 58:     PetscCall(VecCUDARestoreArrayWrite(x, &ax));
 59:   } else
 60: #endif
 61:   {
 62:     PetscCall(VecSet(x, 0.));
 63:     if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
 64:     PetscCall(VecAssemblyBegin(x));
 65:     PetscCall(VecAssemblyEnd(x));
 66:   }
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: /* these are approximate norms */
 71: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
 72:    NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
 73: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
 74: {
 75:   Vec         x, y, w, z;
 76:   PetscReal   normz, adot;
 77:   PetscScalar dot;
 78:   PetscInt    i, j, N, jold = -1;
 79:   PetscBool   boundtocpu = PETSC_TRUE;

 81:   PetscFunctionBegin;
 82: #if defined(PETSC_HAVE_DEVICE)
 83:   boundtocpu = A->boundtocpu;
 84: #endif
 85:   switch (normtype) {
 86:   case NORM_INFINITY:
 87:   case NORM_1:
 88:     if (normsamples < 0) normsamples = 10; /* pure guess */
 89:     if (normtype == NORM_INFINITY) {
 90:       Mat B;
 91:       PetscCall(MatCreateTranspose(A, &B));
 92:       A = B;
 93:     } else {
 94:       PetscCall(PetscObjectReference((PetscObject)A));
 95:     }
 96:     PetscCall(MatCreateVecs(A, &x, &y));
 97:     PetscCall(MatCreateVecs(A, &z, &w));
 98:     PetscCall(VecBindToCPU(x, boundtocpu));
 99:     PetscCall(VecBindToCPU(y, boundtocpu));
100:     PetscCall(VecBindToCPU(z, boundtocpu));
101:     PetscCall(VecBindToCPU(w, boundtocpu));
102:     PetscCall(VecGetSize(x, &N));
103:     PetscCall(VecSet(x, 1. / N));
104:     *n = 0.0;
105:     for (i = 0; i < normsamples; i++) {
106:       PetscCall(MatMult(A, x, y));
107:       PetscCall(VecPointwiseSign(w, y, VEC_SIGN_ZERO_TO_SIGNED_UNIT));
108:       PetscCall(MatMultTranspose(A, w, z));
109:       PetscCall(VecNorm(z, NORM_INFINITY, &normz));
110:       PetscCall(VecDot(x, z, &dot));
111:       adot = PetscAbsScalar(dot);
112:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
113:       if (normz <= adot && i > 0) {
114:         PetscCall(VecNorm(y, NORM_1, n));
115:         break;
116:       }
117:       PetscCall(VecMax(z, &j, &normz));
118:       if (j == jold) {
119:         PetscCall(VecNorm(y, NORM_1, n));
120:         PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
121:         break;
122:       }
123:       jold = j;
124:       PetscCall(VecSetDelta(x, j));
125:     }
126:     PetscCall(MatDestroy(&A));
127:     PetscCall(VecDestroy(&x));
128:     PetscCall(VecDestroy(&w));
129:     PetscCall(VecDestroy(&y));
130:     PetscCall(VecDestroy(&z));
131:     break;
132:   case NORM_2:
133:     if (normsamples < 0) normsamples = 20; /* pure guess */
134:     PetscCall(MatCreateVecs(A, &x, &y));
135:     PetscCall(MatCreateVecs(A, &z, NULL));
136:     PetscCall(VecBindToCPU(x, boundtocpu));
137:     PetscCall(VecBindToCPU(y, boundtocpu));
138:     PetscCall(VecBindToCPU(z, boundtocpu));
139:     PetscCall(VecSetRandom(x, NULL));
140:     PetscCall(VecNormalize(x, NULL));
141:     *n = 0.0;
142:     for (i = 0; i < normsamples; i++) {
143:       PetscCall(MatMult(A, x, y));
144:       PetscCall(VecNormalize(y, n));
145:       PetscCall(MatMultTranspose(A, y, z));
146:       PetscCall(VecNorm(z, NORM_2, &normz));
147:       PetscCall(VecDot(x, z, &dot));
148:       adot = PetscAbsScalar(dot);
149:       PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
150:       if (normz <= adot) break;
151:       if (i < normsamples - 1) {
152:         Vec t;

154:         PetscCall(VecNormalize(z, NULL));
155:         t = x;
156:         x = z;
157:         z = t;
158:       }
159:     }
160:     PetscCall(VecDestroy(&x));
161:     PetscCall(VecDestroy(&y));
162:     PetscCall(VecDestroy(&z));
163:     break;
164:   default:
165:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
166:   }
167:   PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }