Actual source code: htool.cxx
1: #include <../src/mat/impls/htool/htool.hpp>
2: #include <petscblaslapack.h>
3: #include <set>
5: #define ALEN(a) (sizeof(a)/sizeof((a)[0]))
6: const char *const MatHtoolCompressorTypes[] = { "sympartialACA", "fullACA", "SVD" };
7: const char *const MatHtoolClusteringTypes[] = { "PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric" };
8: const char HtoolCitation[] = "@article{marchand2020two,\n"
9: " Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
10: " Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
11: " Year = {2020},\n"
12: " Publisher = {Elsevier},\n"
13: " Journal = {Numerische Mathematik},\n"
14: " Volume = {146},\n"
15: " Pages = {597--628},\n"
16: " Url = {https://github.com/htool-ddm/htool}\n"
17: "}\n";
18: static PetscBool HtoolCite = PETSC_FALSE;
20: static PetscErrorCode MatGetDiagonal_Htool(Mat A,Vec v)
21: {
22: Mat_Htool *a = (Mat_Htool*)A->data;
23: PetscScalar *x;
24: PetscBool flg;
26: MatHasCongruentLayouts(A,&flg);
28: VecGetArrayWrite(v,&x);
29: a->hmatrix->copy_local_diagonal(x);
30: VecRestoreArrayWrite(v,&x);
31: VecScale(v,a->s);
32: return 0;
33: }
35: static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A,Mat *b)
36: {
37: Mat_Htool *a = (Mat_Htool*)A->data;
38: Mat B;
39: PetscScalar *ptr;
40: PetscBool flg;
42: MatHasCongruentLayouts(A,&flg);
44: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B); /* same logic as in MatGetDiagonalBlock_MPIDense() */
45: if (!B) {
46: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,A->rmap->n,A->rmap->n,A->rmap->n,NULL,&B);
47: MatDenseGetArrayWrite(B,&ptr);
48: a->hmatrix->copy_local_diagonal_block(ptr);
49: MatDenseRestoreArrayWrite(B,&ptr);
50: MatPropagateSymmetryOptions(A,B);
51: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
52: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
53: MatScale(B,a->s);
54: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
55: *b = B;
56: MatDestroy(&B);
57: } else *b = B;
58: return 0;
59: }
61: static PetscErrorCode MatMult_Htool(Mat A,Vec x,Vec y)
62: {
63: Mat_Htool *a = (Mat_Htool*)A->data;
64: const PetscScalar *in;
65: PetscScalar *out;
67: VecGetArrayRead(x,&in);
68: VecGetArrayWrite(y,&out);
69: a->hmatrix->mvprod_local_to_local(in,out);
70: VecRestoreArrayRead(x,&in);
71: VecRestoreArrayWrite(y,&out);
72: VecScale(y,a->s);
73: return 0;
74: }
76: /* naive implementation of MatMultAdd() needed for FEM-BEM coupling via MATNEST */
77: static PetscErrorCode MatMultAdd_Htool(Mat A,Vec v1,Vec v2,Vec v3)
78: {
79: Mat_Htool *a = (Mat_Htool*)A->data;
80: Vec tmp;
81: const PetscScalar scale = a->s;
83: VecDuplicate(v2,&tmp);
84: VecCopy(v2,v3); /* no-op in MatMultAdd(bA->m[i][j],bx[j],by[i],by[i]) since VecCopy() checks for x == y */
85: a->s = 1.0; /* set s to 1.0 since VecAXPY() may be used to scale the MatMult() output Vec */
86: MatMult_Htool(A,v1,tmp);
87: VecAXPY(v3,scale,tmp);
88: VecDestroy(&tmp);
89: a->s = scale; /* set s back to its original value */
90: return 0;
91: }
93: static PetscErrorCode MatMultTranspose_Htool(Mat A,Vec x,Vec y)
94: {
95: Mat_Htool *a = (Mat_Htool*)A->data;
96: const PetscScalar *in;
97: PetscScalar *out;
99: VecGetArrayRead(x,&in);
100: VecGetArrayWrite(y,&out);
101: a->hmatrix->mvprod_transp_local_to_local(in,out);
102: VecRestoreArrayRead(x,&in);
103: VecRestoreArrayWrite(y,&out);
104: VecScale(y,a->s);
105: return 0;
106: }
108: static PetscErrorCode MatIncreaseOverlap_Htool(Mat A,PetscInt is_max,IS is[],PetscInt ov)
109: {
110: std::set<PetscInt> set;
111: const PetscInt *idx;
112: PetscInt *oidx,size,bs[2];
113: PetscMPIInt csize;
115: MatGetBlockSizes(A,bs,bs+1);
116: if (bs[0] != bs[1]) bs[0] = 1;
117: for (PetscInt i=0; i<is_max; ++i) {
118: /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
119: /* needed to avoid subdomain matrices to replicate A since it is dense */
120: MPI_Comm_size(PetscObjectComm((PetscObject)is[i]),&csize);
122: ISGetSize(is[i],&size);
123: ISGetIndices(is[i],&idx);
124: for (PetscInt j=0; j<size; ++j) {
125: set.insert(idx[j]);
126: for (PetscInt k=1; k<=ov; ++k) { /* for each layer of overlap */
127: if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
128: if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
129: }
130: }
131: ISRestoreIndices(is[i],&idx);
132: ISDestroy(is+i);
133: if (bs[0] > 1) {
134: for (std::set<PetscInt>::iterator it=set.cbegin(); it!=set.cend(); it++) {
135: std::vector<PetscInt> block(bs[0]);
136: std::iota(block.begin(),block.end(),(*it/bs[0])*bs[0]);
137: set.insert(block.cbegin(),block.cend());
138: }
139: }
140: size = set.size(); /* size with overlap */
141: PetscMalloc1(size,&oidx);
142: for (const PetscInt j : set) *oidx++ = j;
143: oidx -= size;
144: ISCreateGeneral(PETSC_COMM_SELF,size,oidx,PETSC_OWN_POINTER,is+i);
145: }
146: return 0;
147: }
149: static PetscErrorCode MatCreateSubMatrices_Htool(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
150: {
151: Mat_Htool *a = (Mat_Htool*)A->data;
152: Mat D,B,BT;
153: const PetscScalar *copy;
154: PetscScalar *ptr;
155: const PetscInt *idxr,*idxc,*it;
156: PetscInt nrow,m,i;
157: PetscBool flg;
159: if (scall != MAT_REUSE_MATRIX) {
160: PetscCalloc1(n,submat);
161: }
162: for (i=0; i<n; ++i) {
163: ISGetLocalSize(irow[i],&nrow);
164: ISGetLocalSize(icol[i],&m);
165: ISGetIndices(irow[i],&idxr);
166: ISGetIndices(icol[i],&idxc);
167: if (scall != MAT_REUSE_MATRIX) {
168: MatCreateDense(PETSC_COMM_SELF,nrow,m,nrow,m,NULL,(*submat)+i);
169: }
170: MatDenseGetArrayWrite((*submat)[i],&ptr);
171: if (irow[i] == icol[i]) { /* same row and column IS? */
172: MatHasCongruentLayouts(A,&flg);
173: if (flg) {
174: ISSorted(irow[i],&flg);
175: if (flg) { /* sorted IS? */
176: it = std::lower_bound(idxr,idxr+nrow,A->rmap->rstart);
177: if (it != idxr+nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
178: if (std::distance(idxr,it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
179: for (PetscInt j=0; j<A->rmap->n && flg; ++j) if (PetscUnlikely(it[j] != A->rmap->rstart+j)) flg = PETSC_FALSE;
180: if (flg) { /* complete local diagonal block in IS? */
181: /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
182: * [ B C E ]
183: * A = [ B D E ]
184: * [ B F E ]
185: */
186: m = std::distance(idxr,it); /* shift of the coefficient (0,0) of block D from above */
187: MatGetDiagonalBlock_Htool(A,&D);
188: MatDenseGetArrayRead(D,©);
189: for (PetscInt k=0; k<A->rmap->n; ++k) {
190: PetscArraycpy(ptr+(m+k)*nrow+m,copy+k*A->rmap->n,A->rmap->n); /* block D from above */
191: }
192: MatDenseRestoreArrayRead(D,©);
193: if (m) {
194: a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* vertical block B from above */
195: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
196: if (A->symmetric || A->hermitian) {
197: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,m,A->rmap->n,m,ptr+m,&B);
198: MatDenseSetLDA(B,nrow);
199: MatCreateDense(PETSC_COMM_SELF,m,A->rmap->n,m,A->rmap->n,ptr+m*nrow,&BT);
200: MatDenseSetLDA(BT,nrow);
201: if (A->hermitian && PetscDefined(USE_COMPLEX)) {
202: MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT);
203: } else {
204: MatTranspose(B,MAT_REUSE_MATRIX,&BT);
205: }
206: MatDestroy(&B);
207: MatDestroy(&BT);
208: } else {
209: for (PetscInt k=0; k<A->rmap->n; ++k) { /* block C from above */
210: a->wrapper->copy_submatrix(m,1,idxr,idxc+m+k,ptr+(m+k)*nrow);
211: }
212: }
213: }
214: if (m+A->rmap->n != nrow) {
215: a->wrapper->copy_submatrix(nrow,std::distance(it+A->rmap->n,idxr+nrow),idxr,idxc+m+A->rmap->n,ptr+(m+A->rmap->n)*nrow); /* vertical block E from above */
216: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
217: if (A->symmetric || A->hermitian) {
218: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),ptr+(m+A->rmap->n)*nrow+m,&B);
219: MatDenseSetLDA(B,nrow);
220: MatCreateDense(PETSC_COMM_SELF,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,ptr+m*nrow+m+A->rmap->n,&BT);
221: MatDenseSetLDA(BT,nrow);
222: if (A->hermitian && PetscDefined(USE_COMPLEX)) {
223: MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT);
224: } else {
225: MatTranspose(B,MAT_REUSE_MATRIX,&BT);
226: }
227: MatDestroy(&B);
228: MatDestroy(&BT);
229: } else {
230: for (PetscInt k=0; k<A->rmap->n; ++k) { /* block F from above */
231: a->wrapper->copy_submatrix(std::distance(it+A->rmap->n,idxr+nrow),1,it+A->rmap->n,idxc+m+k,ptr+(m+k)*nrow+m+A->rmap->n);
232: }
233: }
234: }
235: } /* complete local diagonal block not in IS */
236: } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
237: } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
238: } /* unsorted IS */
239: }
240: } else flg = PETSC_FALSE; /* different row and column IS */
241: if (!flg) a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* reassemble everything */
242: ISRestoreIndices(irow[i],&idxr);
243: ISRestoreIndices(icol[i],&idxc);
244: MatDenseRestoreArrayWrite((*submat)[i],&ptr);
245: MatAssemblyBegin((*submat)[i],MAT_FINAL_ASSEMBLY);
246: MatAssemblyEnd((*submat)[i],MAT_FINAL_ASSEMBLY);
247: MatScale((*submat)[i],a->s);
248: }
249: return 0;
250: }
252: static PetscErrorCode MatDestroy_Htool(Mat A)
253: {
254: Mat_Htool *a = (Mat_Htool*)A->data;
255: PetscContainer container;
256: MatHtoolKernelTranspose *kernelt;
258: PetscObjectChangeTypeName((PetscObject)A,NULL);
259: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",NULL);
260: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",NULL);
261: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",NULL);
262: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",NULL);
263: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",NULL);
264: PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",NULL);
265: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",NULL);
266: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",NULL);
267: PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",NULL);
268: PetscObjectQuery((PetscObject)A,"KernelTranspose",(PetscObject*)&container);
269: if (container) { /* created in MatTranspose_Htool() */
270: PetscContainerGetPointer(container,(void**)&kernelt);
271: MatDestroy(&kernelt->A);
272: PetscFree(kernelt);
273: PetscContainerDestroy(&container);
274: PetscObjectCompose((PetscObject)A,"KernelTranspose",NULL);
275: }
276: if (a->gcoords_source != a->gcoords_target) {
277: PetscFree(a->gcoords_source);
278: }
279: PetscFree(a->gcoords_target);
280: PetscFree2(a->work_source,a->work_target);
281: delete a->wrapper;
282: delete a->hmatrix;
283: PetscFree(A->data);
284: return 0;
285: }
287: static PetscErrorCode MatView_Htool(Mat A,PetscViewer pv)
288: {
289: Mat_Htool *a = (Mat_Htool*)A->data;
290: PetscBool flg;
292: PetscObjectTypeCompare((PetscObject)pv,PETSCVIEWERASCII,&flg);
293: if (flg) {
294: PetscViewerASCIIPrintf(pv,"symmetry: %c\n",a->hmatrix->get_symmetry_type());
295: if (PetscAbsScalar(a->s-1.0) > PETSC_MACHINE_EPSILON) {
296: #if defined(PETSC_USE_COMPLEX)
297: PetscViewerASCIIPrintf(pv,"scaling: %g+%gi\n",(double)PetscRealPart(a->s),(double)PetscImaginaryPart(a->s));
298: #else
299: PetscViewerASCIIPrintf(pv,"scaling: %g\n",(double)a->s);
300: #endif
301: }
302: PetscViewerASCIIPrintf(pv,"minimum cluster size: %" PetscInt_FMT "\n",a->bs[0]);
303: PetscViewerASCIIPrintf(pv,"maximum block size: %" PetscInt_FMT "\n",a->bs[1]);
304: PetscViewerASCIIPrintf(pv,"epsilon: %g\n",(double)a->epsilon);
305: PetscViewerASCIIPrintf(pv,"eta: %g\n",(double)a->eta);
306: PetscViewerASCIIPrintf(pv,"minimum target depth: %" PetscInt_FMT "\n",a->depth[0]);
307: PetscViewerASCIIPrintf(pv,"minimum source depth: %" PetscInt_FMT "\n",a->depth[1]);
308: PetscViewerASCIIPrintf(pv,"compressor: %s\n",MatHtoolCompressorTypes[a->compressor]);
309: PetscViewerASCIIPrintf(pv,"clustering: %s\n",MatHtoolClusteringTypes[a->clustering]);
310: PetscViewerASCIIPrintf(pv,"compression ratio: %s\n",a->hmatrix->get_infos("Compression_ratio").c_str());
311: PetscViewerASCIIPrintf(pv,"space saving: %s\n",a->hmatrix->get_infos("Space_saving").c_str());
312: PetscViewerASCIIPrintf(pv,"number of dense (resp. low rank) matrices: %s (resp. %s)\n",a->hmatrix->get_infos("Number_of_dmat").c_str(),a->hmatrix->get_infos("Number_of_lrmat").c_str());
313: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Dense_block_size_min").c_str(),a->hmatrix->get_infos("Dense_block_size_mean").c_str(),a->hmatrix->get_infos("Dense_block_size_max").c_str());
314: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Low_rank_block_size_min").c_str(),a->hmatrix->get_infos("Low_rank_block_size_mean").c_str(),a->hmatrix->get_infos("Low_rank_block_size_max").c_str());
315: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) ranks: (%s, %s, %s)\n",a->hmatrix->get_infos("Rank_min").c_str(),a->hmatrix->get_infos("Rank_mean").c_str(),a->hmatrix->get_infos("Rank_max").c_str());
316: }
317: return 0;
318: }
320: static PetscErrorCode MatScale_Htool(Mat A,PetscScalar s)
321: {
322: Mat_Htool *a = (Mat_Htool*)A->data;
324: a->s *= s;
325: return 0;
326: }
328: /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
329: static PetscErrorCode MatGetRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
330: {
331: Mat_Htool *a = (Mat_Htool*)A->data;
332: PetscInt *idxc;
333: PetscBLASInt one = 1,bn;
335: if (nz) *nz = A->cmap->N;
336: if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
337: PetscMalloc1(A->cmap->N,&idxc);
338: for (PetscInt i=0; i<A->cmap->N; ++i) idxc[i] = i;
339: }
340: if (idx) *idx = idxc;
341: if (v) {
342: PetscMalloc1(A->cmap->N,v);
343: if (a->wrapper) a->wrapper->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
344: else reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx)->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
345: PetscBLASIntCast(A->cmap->N,&bn);
346: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a->s,*v,&one));
347: }
348: if (!idx) {
349: PetscFree(idxc);
350: }
351: return 0;
352: }
354: static PetscErrorCode MatRestoreRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
355: {
356: if (nz) *nz = 0;
357: if (idx) {
358: PetscFree(*idx);
359: }
360: if (v) {
361: PetscFree(*v);
362: }
363: return 0;
364: }
366: static PetscErrorCode MatSetFromOptions_Htool(PetscOptionItems *PetscOptionsObject,Mat A)
367: {
368: Mat_Htool *a = (Mat_Htool*)A->data;
369: PetscInt n;
370: PetscBool flg;
372: PetscOptionsHead(PetscOptionsObject,"Htool options");
373: PetscOptionsInt("-mat_htool_min_cluster_size","Minimal leaf size in cluster tree",NULL,a->bs[0],a->bs,NULL);
374: PetscOptionsInt("-mat_htool_max_block_size","Maximal number of coefficients in a dense block",NULL,a->bs[1],a->bs + 1,NULL);
375: PetscOptionsReal("-mat_htool_epsilon","Relative error in Frobenius norm when approximating a block",NULL,a->epsilon,&a->epsilon,NULL);
376: PetscOptionsReal("-mat_htool_eta","Admissibility condition tolerance",NULL,a->eta,&a->eta,NULL);
377: PetscOptionsInt("-mat_htool_min_target_depth","Minimal cluster tree depth associated with the rows",NULL,a->depth[0],a->depth,NULL);
378: PetscOptionsInt("-mat_htool_min_source_depth","Minimal cluster tree depth associated with the columns",NULL,a->depth[1],a->depth + 1,NULL);
379: n = 0;
380: PetscOptionsEList("-mat_htool_compressor","Type of compression","MatHtoolCompressorType",MatHtoolCompressorTypes,ALEN(MatHtoolCompressorTypes),MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA],&n,&flg);
381: if (flg) a->compressor = MatHtoolCompressorType(n);
382: n = 0;
383: PetscOptionsEList("-mat_htool_clustering","Type of clustering","MatHtoolClusteringType",MatHtoolClusteringTypes,ALEN(MatHtoolClusteringTypes),MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR],&n,&flg);
384: if (flg) a->clustering = MatHtoolClusteringType(n);
385: PetscOptionsTail();
386: return 0;
387: }
389: static PetscErrorCode MatAssemblyEnd_Htool(Mat A,MatAssemblyType type)
390: {
391: Mat_Htool *a = (Mat_Htool*)A->data;
392: const PetscInt *ranges;
393: PetscInt *offset;
394: PetscMPIInt size;
395: char S = PetscDefined(USE_COMPLEX) && A->hermitian ? 'H' : (A->symmetric ? 'S' : 'N'),uplo = S == 'N' ? 'N' : 'U';
396: htool::VirtualGenerator<PetscScalar> *generator = nullptr;
397: std::shared_ptr<htool::VirtualCluster> t,s = nullptr;
398: std::shared_ptr<htool::VirtualLowRankGenerator<PetscScalar>> compressor = nullptr;
400: PetscCitationsRegister(HtoolCitation,&HtoolCite);
401: delete a->wrapper;
402: delete a->hmatrix;
403: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
404: PetscMalloc1(2*size,&offset);
405: MatGetOwnershipRanges(A,&ranges);
406: for (PetscInt i=0; i<size; ++i) {
407: offset[2*i] = ranges[i];
408: offset[2*i+1] = ranges[i+1] - ranges[i];
409: }
410: switch (a->clustering) {
411: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
412: t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
413: break;
414: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
415: t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
416: break;
417: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
418: t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
419: break;
420: default:
421: t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
422: }
423: t->set_minclustersize(a->bs[0]);
424: t->build(A->rmap->N,a->gcoords_target,offset);
425: if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
426: else {
427: a->wrapper = NULL;
428: generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx);
429: }
430: if (a->gcoords_target != a->gcoords_source) {
431: MatGetOwnershipRangesColumn(A,&ranges);
432: for (PetscInt i=0; i<size; ++i) {
433: offset[2*i] = ranges[i];
434: offset[2*i+1] = ranges[i+1] - ranges[i];
435: }
436: switch (a->clustering) {
437: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
438: s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
439: break;
440: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
441: s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
442: break;
443: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
444: s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
445: break;
446: default:
447: s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
448: }
449: s->set_minclustersize(a->bs[0]);
450: s->build(A->cmap->N,a->gcoords_source,offset);
451: S = uplo = 'N';
452: }
453: PetscFree(offset);
454: switch (a->compressor) {
455: case MAT_HTOOL_COMPRESSOR_FULL_ACA:
456: compressor = std::make_shared<htool::fullACA<PetscScalar>>();
457: break;
458: case MAT_HTOOL_COMPRESSOR_SVD:
459: compressor = std::make_shared<htool::SVD<PetscScalar>>();
460: break;
461: default:
462: compressor = std::make_shared<htool::sympartialACA<PetscScalar>>();
463: }
464: a->hmatrix = dynamic_cast<htool::VirtualHMatrix<PetscScalar>*>(new htool::HMatrix<PetscScalar>(t,s ? s : t,a->epsilon,a->eta,S,uplo));
465: a->hmatrix->set_compression(compressor);
466: a->hmatrix->set_maxblocksize(a->bs[1]);
467: a->hmatrix->set_mintargetdepth(a->depth[0]);
468: a->hmatrix->set_minsourcedepth(a->depth[1]);
469: if (s) a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target,a->gcoords_source);
470: else a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target);
471: return 0;
472: }
474: static PetscErrorCode MatProductNumeric_Htool(Mat C)
475: {
476: Mat_Product *product = C->product;
477: Mat_Htool *a = (Mat_Htool*)product->A->data;
478: const PetscScalar *in;
479: PetscScalar *out;
480: PetscInt N,lda;
482: MatCheckProduct(C,1);
483: MatGetSize(C,NULL,&N);
484: MatDenseGetLDA(C,&lda);
486: MatDenseGetArrayRead(product->B,&in);
487: MatDenseGetArrayWrite(C,&out);
488: switch (product->type) {
489: case MATPRODUCT_AB:
490: a->hmatrix->mvprod_local_to_local(in,out,N);
491: break;
492: case MATPRODUCT_AtB:
493: a->hmatrix->mvprod_transp_local_to_local(in,out,N);
494: break;
495: default:
496: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductType %s is not supported",MatProductTypes[product->type]);
497: }
498: MatDenseRestoreArrayWrite(C,&out);
499: MatDenseRestoreArrayRead(product->B,&in);
500: MatScale(C,a->s);
501: return 0;
502: }
504: static PetscErrorCode MatProductSymbolic_Htool(Mat C)
505: {
506: Mat_Product *product = C->product;
507: Mat A,B;
508: PetscBool flg;
510: MatCheckProduct(C,1);
511: A = product->A;
512: B = product->B;
513: PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,"");
515: switch (product->type) {
516: case MATPRODUCT_AB:
517: if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
518: MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);
519: }
520: break;
521: case MATPRODUCT_AtB:
522: if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
523: MatSetSizes(C,A->cmap->n,B->cmap->n,A->cmap->N,B->cmap->N);
524: }
525: break;
526: default:
527: SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"ProductType %s is not supported",MatProductTypes[product->type]);
528: }
529: MatSetType(C,MATDENSE);
530: MatSetUp(C);
531: MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
532: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
533: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
534: C->ops->productsymbolic = NULL;
535: C->ops->productnumeric = MatProductNumeric_Htool;
536: return 0;
537: }
539: static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
540: {
541: MatCheckProduct(C,1);
542: if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
543: return 0;
544: }
546: static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
547: {
548: Mat_Htool *a = (Mat_Htool*)A->data;
550: *hmatrix = a->hmatrix;
551: return 0;
552: }
554: /*@C
555: MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a MATHTOOL.
557: Input Parameter:
558: . A - hierarchical matrix
560: Output Parameter:
561: . hmatrix - opaque pointer to a Htool virtual matrix
563: Level: advanced
565: .seealso: MATHTOOL
566: @*/
567: PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
568: {
571: PetscTryMethod(A,"MatHtoolGetHierarchicalMat_C",(Mat,const htool::VirtualHMatrix<PetscScalar>**),(A,hmatrix));
572: return 0;
573: }
575: static PetscErrorCode MatHtoolSetKernel_Htool(Mat A,MatHtoolKernel kernel,void *kernelctx)
576: {
577: Mat_Htool *a = (Mat_Htool*)A->data;
579: a->kernel = kernel;
580: a->kernelctx = kernelctx;
581: delete a->wrapper;
582: if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
583: return 0;
584: }
586: /*@C
587: MatHtoolSetKernel - Sets the kernel and context used for the assembly of a MATHTOOL.
589: Input Parameters:
590: + A - hierarchical matrix
591: . kernel - computational kernel (or NULL)
592: - kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
594: Level: advanced
596: .seealso: MATHTOOL, MatCreateHtoolFromKernel()
597: @*/
598: PETSC_EXTERN PetscErrorCode MatHtoolSetKernel(Mat A,MatHtoolKernel kernel,void *kernelctx)
599: {
603: PetscTryMethod(A,"MatHtoolSetKernel_C",(Mat,MatHtoolKernel,void*),(A,kernel,kernelctx));
604: return 0;
605: }
607: static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A,IS* is)
608: {
609: Mat_Htool *a = (Mat_Htool*)A->data;
610: std::vector<PetscInt> source;
612: source = a->hmatrix->get_source_cluster()->get_local_perm();
613: ISCreateGeneral(PetscObjectComm((PetscObject)A),source.size(),source.data(),PETSC_COPY_VALUES,is);
614: ISSetPermutation(*is);
615: return 0;
616: }
618: /*@C
619: MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster.
621: Input Parameter:
622: . A - hierarchical matrix
624: Output Parameter:
625: . is - permutation
627: Level: advanced
629: .seealso: MATHTOOL, MatHtoolGetPermutationTarget(), MatHtoolUsePermutation()
630: @*/
631: PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationSource(Mat A,IS* is)
632: {
635: PetscTryMethod(A,"MatHtoolGetPermutationSource_C",(Mat,IS*),(A,is));
636: return 0;
637: }
639: static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A,IS* is)
640: {
641: Mat_Htool *a = (Mat_Htool*)A->data;
642: std::vector<PetscInt> target;
644: target = a->hmatrix->get_target_cluster()->get_local_perm();
645: ISCreateGeneral(PetscObjectComm((PetscObject)A),target.size(),target.data(),PETSC_COPY_VALUES,is);
646: ISSetPermutation(*is);
647: return 0;
648: }
650: /*@C
651: MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster.
653: Input Parameter:
654: . A - hierarchical matrix
656: Output Parameter:
657: . is - permutation
659: Level: advanced
661: .seealso: MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolUsePermutation()
662: @*/
663: PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationTarget(Mat A,IS* is)
664: {
667: PetscTryMethod(A,"MatHtoolGetPermutationTarget_C",(Mat,IS*),(A,is));
668: return 0;
669: }
671: static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A,PetscBool use)
672: {
673: Mat_Htool *a = (Mat_Htool*)A->data;
675: a->hmatrix->set_use_permutation(use);
676: return 0;
677: }
679: /*@C
680: MatHtoolUsePermutation - Sets whether MATHTOOL should permute input (resp. output) vectors following its internal source (resp. target) permutation.
682: Input Parameters:
683: + A - hierarchical matrix
684: - use - Boolean value
686: Level: advanced
688: .seealso: MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolGetPermutationTarget()
689: @*/
690: PETSC_EXTERN PetscErrorCode MatHtoolUsePermutation(Mat A,PetscBool use)
691: {
694: PetscTryMethod(A,"MatHtoolUsePermutation_C",(Mat,PetscBool),(A,use));
695: return 0;
696: }
698: static PetscErrorCode MatConvert_Htool_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
699: {
700: Mat C;
701: Mat_Htool *a = (Mat_Htool*)A->data;
702: PetscInt lda;
703: PetscScalar *array;
705: if (reuse == MAT_REUSE_MATRIX) {
706: C = *B;
708: MatDenseGetLDA(C,&lda);
710: } else {
711: MatCreate(PetscObjectComm((PetscObject)A),&C);
712: MatSetSizes(C,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
713: MatSetType(C,MATDENSE);
714: MatSetUp(C);
715: MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
716: }
717: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
718: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
719: MatDenseGetArrayWrite(C,&array);
720: a->hmatrix->copy_local_dense_perm(array);
721: MatDenseRestoreArrayWrite(C,&array);
722: MatScale(C,a->s);
723: if (reuse == MAT_INPLACE_MATRIX) {
724: MatHeaderReplace(A,&C);
725: } else *B = C;
726: return 0;
727: }
729: static PetscErrorCode GenEntriesTranspose(PetscInt sdim,PetscInt M,PetscInt N,const PetscInt *rows,const PetscInt *cols,PetscScalar *ptr,void *ctx)
730: {
731: MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose*)ctx;
732: PetscScalar *tmp;
734: generator->kernel(sdim,N,M,cols,rows,ptr,generator->kernelctx);
735: PetscMalloc1(M*N,&tmp);
736: PetscArraycpy(tmp,ptr,M*N);
737: for (PetscInt i=0; i<M; ++i) {
738: for (PetscInt j=0; j<N; ++j) ptr[i+j*M] = tmp[j+i*N];
739: }
740: PetscFree(tmp);
741: return 0;
742: }
744: /* naive implementation which keeps a reference to the original Mat */
745: static PetscErrorCode MatTranspose_Htool(Mat A,MatReuse reuse,Mat *B)
746: {
747: Mat C;
748: Mat_Htool *a = (Mat_Htool*)A->data,*c;
749: PetscInt M = A->rmap->N,N = A->cmap->N,m = A->rmap->n,n = A->cmap->n;
750: PetscContainer container;
751: MatHtoolKernelTranspose *kernelt;
754: if (reuse == MAT_INITIAL_MATRIX) {
755: MatCreate(PetscObjectComm((PetscObject)A),&C);
756: MatSetSizes(C,n,m,N,M);
757: MatSetType(C,((PetscObject)A)->type_name);
758: MatSetUp(C);
759: PetscContainerCreate(PetscObjectComm((PetscObject)C),&container);
760: PetscNew(&kernelt);
761: PetscContainerSetPointer(container,kernelt);
762: PetscObjectCompose((PetscObject)C,"KernelTranspose",(PetscObject)container);
763: } else {
764: C = *B;
765: PetscObjectQuery((PetscObject)C,"KernelTranspose",(PetscObject*)&container);
767: PetscContainerGetPointer(container,(void**)&kernelt);
768: }
769: c = (Mat_Htool*)C->data;
770: c->dim = a->dim;
771: c->s = a->s;
772: c->kernel = GenEntriesTranspose;
773: if (kernelt->A != A) {
774: MatDestroy(&kernelt->A);
775: kernelt->A = A;
776: PetscObjectReference((PetscObject)A);
777: }
778: kernelt->kernel = a->kernel;
779: kernelt->kernelctx = a->kernelctx;
780: c->kernelctx = kernelt;
781: if (reuse == MAT_INITIAL_MATRIX) {
782: PetscMalloc1(N*c->dim,&c->gcoords_target);
783: PetscArraycpy(c->gcoords_target,a->gcoords_source,N*c->dim);
784: if (a->gcoords_target != a->gcoords_source) {
785: PetscMalloc1(M*c->dim,&c->gcoords_source);
786: PetscArraycpy(c->gcoords_source,a->gcoords_target,M*c->dim);
787: } else c->gcoords_source = c->gcoords_target;
788: PetscCalloc2(M,&c->work_source,N,&c->work_target);
789: }
790: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
791: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
792: if (reuse == MAT_INITIAL_MATRIX) *B = C;
793: return 0;
794: }
796: /*@C
797: MatCreateHtoolFromKernel - Creates a MATHTOOL from a user-supplied kernel.
799: Input Parameters:
800: + comm - MPI communicator
801: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
802: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
803: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
804: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
805: . spacedim - dimension of the space coordinates
806: . coords_target - coordinates of the target
807: . coords_source - coordinates of the source
808: . kernel - computational kernel (or NULL)
809: - kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
811: Output Parameter:
812: . B - matrix
814: Options Database Keys:
815: + -mat_htool_min_cluster_size <PetscInt> - minimal leaf size in cluster tree
816: . -mat_htool_max_block_size <PetscInt> - maximal number of coefficients in a dense block
817: . -mat_htool_epsilon <PetscReal> - relative error in Frobenius norm when approximating a block
818: . -mat_htool_eta <PetscReal> - admissibility condition tolerance
819: . -mat_htool_min_target_depth <PetscInt> - minimal cluster tree depth associated with the rows
820: . -mat_htool_min_source_depth <PetscInt> - minimal cluster tree depth associated with the columns
821: . -mat_htool_compressor <sympartialACA, fullACA, SVD> - type of compression
822: - -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
824: Level: intermediate
826: .seealso: MatCreate(), MATHTOOL, PCSetCoordinates(), MatHtoolSetKernel(), MatHtoolCompressorType, MATH2OPUS, MatCreateH2OpusFromKernel()
827: @*/
828: PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords_target[],const PetscReal coords_source[],MatHtoolKernel kernel,void *kernelctx,Mat *B)
829: {
830: Mat A;
831: Mat_Htool *a;
833: MatCreate(comm,&A);
839: MatSetSizes(A,m,n,M,N);
840: MatSetType(A,MATHTOOL);
841: MatSetUp(A);
842: a = (Mat_Htool*)A->data;
843: a->dim = spacedim;
844: a->s = 1.0;
845: a->kernel = kernel;
846: a->kernelctx = kernelctx;
847: PetscCalloc1(A->rmap->N*spacedim,&a->gcoords_target);
848: PetscArraycpy(a->gcoords_target+A->rmap->rstart*spacedim,coords_target,A->rmap->n*spacedim);
849: MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_target,A->rmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A)); /* global target coordinates */
850: if (coords_target != coords_source) {
851: PetscCalloc1(A->cmap->N*spacedim,&a->gcoords_source);
852: PetscArraycpy(a->gcoords_source+A->cmap->rstart*spacedim,coords_source,A->cmap->n*spacedim);
853: MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_source,A->cmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A)); /* global source coordinates */
854: } else a->gcoords_source = a->gcoords_target;
855: PetscCalloc2(A->cmap->N,&a->work_source,A->rmap->N,&a->work_target);
856: *B = A;
857: return 0;
858: }
860: /*MC
861: MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
863: Use ./configure --download-htool to install PETSc to use Htool.
865: Options Database Keys:
866: . -mat_type htool - matrix type to "htool" during a call to MatSetFromOptions()
868: Level: beginner
870: .seealso: MATH2OPUS, MATDENSE, MatCreateHtoolFromKernel(), MatHtoolSetKernel()
871: M*/
872: PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
873: {
874: Mat_Htool *a;
876: PetscNewLog(A,&a);
877: A->data = (void*)a;
878: PetscObjectChangeTypeName((PetscObject)A,MATHTOOL);
879: PetscMemzero(A->ops,sizeof(struct _MatOps));
880: A->ops->getdiagonal = MatGetDiagonal_Htool;
881: A->ops->getdiagonalblock = MatGetDiagonalBlock_Htool;
882: A->ops->mult = MatMult_Htool;
883: A->ops->multadd = MatMultAdd_Htool;
884: A->ops->multtranspose = MatMultTranspose_Htool;
885: if (!PetscDefined(USE_COMPLEX)) A->ops->multhermitiantranspose = MatMultTranspose_Htool;
886: A->ops->increaseoverlap = MatIncreaseOverlap_Htool;
887: A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
888: A->ops->transpose = MatTranspose_Htool;
889: A->ops->destroy = MatDestroy_Htool;
890: A->ops->view = MatView_Htool;
891: A->ops->setfromoptions = MatSetFromOptions_Htool;
892: A->ops->scale = MatScale_Htool;
893: A->ops->getrow = MatGetRow_Htool;
894: A->ops->restorerow = MatRestoreRow_Htool;
895: A->ops->assemblyend = MatAssemblyEnd_Htool;
896: a->dim = 0;
897: a->gcoords_target = NULL;
898: a->gcoords_source = NULL;
899: a->s = 1.0;
900: a->bs[0] = 10;
901: a->bs[1] = 1000000;
902: a->epsilon = PetscSqrtReal(PETSC_SMALL);
903: a->eta = 10.0;
904: a->depth[0] = 0;
905: a->depth[1] = 0;
906: a->compressor = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
907: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",MatProductSetFromOptions_Htool);
908: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",MatProductSetFromOptions_Htool);
909: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",MatConvert_Htool_Dense);
910: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",MatConvert_Htool_Dense);
911: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",MatHtoolGetHierarchicalMat_Htool);
912: PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",MatHtoolSetKernel_Htool);
913: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",MatHtoolGetPermutationSource_Htool);
914: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",MatHtoolGetPermutationTarget_Htool);
915: PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",MatHtoolUsePermutation_Htool);
916: return 0;
917: }