Actual source code: htool.cxx
1: #include <../src/mat/impls/htool/htool.hpp>
2: #include <petscblaslapack.h>
3: #include <set>
5: #define ALEN(a) (sizeof(a)/sizeof((a)[0]))
6: const char *const MatHtoolCompressorTypes[] = { "sympartialACA", "fullACA", "SVD" };
7: const char *const MatHtoolClusteringTypes[] = { "PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric" };
8: const char HtoolCitation[] = "@article{marchand2020two,\n"
9: " Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
10: " Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
11: " Year = {2020},\n"
12: " Publisher = {Elsevier},\n"
13: " Journal = {Numerische Mathematik},\n"
14: " Volume = {146},\n"
15: " Pages = {597--628},\n"
16: " Url = {https://github.com/htool-ddm/htool}\n"
17: "}\n";
18: static PetscBool HtoolCite = PETSC_FALSE;
20: static PetscErrorCode MatGetDiagonal_Htool(Mat A,Vec v)
21: {
22: Mat_Htool *a = (Mat_Htool*)A->data;
23: PetscScalar *x;
24: PetscBool flg;
28: MatHasCongruentLayouts(A,&flg);
29: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
30: VecGetArrayWrite(v,&x);
31: a->hmatrix->copy_local_diagonal(x);
32: VecRestoreArrayWrite(v,&x);
33: VecScale(v,a->s);
34: return(0);
35: }
37: static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A,Mat *b)
38: {
39: Mat_Htool *a = (Mat_Htool*)A->data;
40: Mat B;
41: PetscScalar *ptr;
42: PetscBool flg;
46: MatHasCongruentLayouts(A,&flg);
47: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
48: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B); /* same logic as in MatGetDiagonalBlock_MPIDense() */
49: if (!B) {
50: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,A->rmap->n,A->rmap->n,A->rmap->n,NULL,&B);
51: MatDenseGetArrayWrite(B,&ptr);
52: a->hmatrix->copy_local_diagonal_block(ptr);
53: MatDenseRestoreArrayWrite(B,&ptr);
54: MatPropagateSymmetryOptions(A,B);
55: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
56: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
57: MatScale(B,a->s);
58: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
59: *b = B;
60: MatDestroy(&B);
61: } else *b = B;
62: return(0);
63: }
65: static PetscErrorCode MatMult_Htool(Mat A,Vec x,Vec y)
66: {
67: Mat_Htool *a = (Mat_Htool*)A->data;
68: const PetscScalar *in;
69: PetscScalar *out;
70: PetscErrorCode ierr;
73: VecGetArrayRead(x,&in);
74: VecGetArrayWrite(y,&out);
75: a->hmatrix->mvprod_local_to_local(in,out);
76: VecRestoreArrayRead(x,&in);
77: VecRestoreArrayWrite(y,&out);
78: VecScale(y,a->s);
79: return(0);
80: }
82: /* naive implementation of MatMultAdd() needed for FEM-BEM coupling via MATNEST */
83: static PetscErrorCode MatMultAdd_Htool(Mat A,Vec v1,Vec v2,Vec v3)
84: {
85: Mat_Htool *a = (Mat_Htool*)A->data;
86: Vec tmp;
87: const PetscScalar scale = a->s;
88: PetscErrorCode ierr;
91: VecDuplicate(v2,&tmp);
92: VecCopy(v2,v3); /* no-op in MatMultAdd(bA->m[i][j],bx[j],by[i],by[i]) since VecCopy() checks for x == y */
93: a->s = 1.0; /* set s to 1.0 since VecAXPY() may be used to scale the MatMult() output Vec */
94: MatMult_Htool(A,v1,tmp);
95: VecAXPY(v3,scale,tmp);
96: VecDestroy(&tmp);
97: a->s = scale; /* set s back to its original value */
98: return(0);
99: }
101: static PetscErrorCode MatIncreaseOverlap_Htool(Mat A,PetscInt is_max,IS is[],PetscInt ov)
102: {
103: std::set<PetscInt> set;
104: const PetscInt *idx;
105: PetscInt *oidx,size;
106: PetscMPIInt csize;
107: PetscErrorCode ierr;
110: for (PetscInt i=0; i<is_max; ++i) {
111: /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
112: /* needed to avoid subdomain matrices to replicate A since it is dense */
113: MPI_Comm_size(PetscObjectComm((PetscObject)is[i]),&csize);
114: if (csize != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported parallel IS");
115: ISGetSize(is[i],&size);
116: ISGetIndices(is[i],&idx);
117: for (PetscInt j=0; j<size; ++j) {
118: set.insert(idx[j]);
119: for (PetscInt k=1; k<=ov; ++k) { /* for each layer of overlap */
120: if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
121: if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
122: }
123: }
124: ISRestoreIndices(is[i],&idx);
125: ISDestroy(is+i);
126: size = set.size(); /* size with overlap */
127: PetscMalloc1(size,&oidx);
128: for (const PetscInt j : set) *oidx++ = j;
129: oidx -= size;
130: ISCreateGeneral(PETSC_COMM_SELF,size,oidx,PETSC_OWN_POINTER,is+i);
131: }
132: return(0);
133: }
135: static PetscErrorCode MatCreateSubMatrices_Htool(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
136: {
137: Mat_Htool *a = (Mat_Htool*)A->data;
138: Mat D,B,BT;
139: const PetscScalar *copy;
140: PetscScalar *ptr;
141: const PetscInt *idxr,*idxc,*it;
142: PetscInt nrow,m,i;
143: PetscBool flg;
144: PetscErrorCode ierr;
147: if (scall != MAT_REUSE_MATRIX) {
148: PetscCalloc1(n,submat);
149: }
150: for (i=0; i<n; ++i) {
151: ISGetLocalSize(irow[i],&nrow);
152: ISGetLocalSize(icol[i],&m);
153: ISGetIndices(irow[i],&idxr);
154: ISGetIndices(icol[i],&idxc);
155: if (scall != MAT_REUSE_MATRIX) {
156: MatCreateDense(PETSC_COMM_SELF,nrow,m,nrow,m,NULL,(*submat)+i);
157: }
158: MatDenseGetArrayWrite((*submat)[i],&ptr);
159: if (irow[i] == icol[i]) { /* same row and column IS? */
160: MatHasCongruentLayouts(A,&flg);
161: if (flg) {
162: ISSorted(irow[i],&flg);
163: if (flg) { /* sorted IS? */
164: it = std::lower_bound(idxr,idxr+nrow,A->rmap->rstart);
165: if (it != idxr+nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
166: if (std::distance(idxr,it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
167: for (PetscInt j=0; j<A->rmap->n && flg; ++j) if (PetscUnlikely(it[j] != A->rmap->rstart+j)) flg = PETSC_FALSE;
168: if (flg) { /* complete local diagonal block in IS? */
169: /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
170: * [ B C E ]
171: * A = [ B D E ]
172: * [ B F E ]
173: */
174: m = std::distance(idxr,it); /* shift of the coefficient (0,0) of block D from above */
175: MatGetDiagonalBlock_Htool(A,&D);
176: MatDenseGetArrayRead(D,©);
177: for (PetscInt k=0; k<A->rmap->n; ++k) {
178: PetscArraycpy(ptr+(m+k)*nrow+m,copy+k*A->rmap->n,A->rmap->n); /* block D from above */
179: }
180: MatDenseRestoreArrayRead(D,©);
181: if (m) {
182: a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* vertical block B from above */
183: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
184: if (A->symmetric || A->hermitian) {
185: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,m,A->rmap->n,m,ptr+m,&B);
186: MatDenseSetLDA(B,nrow);
187: MatCreateDense(PETSC_COMM_SELF,m,A->rmap->n,m,A->rmap->n,ptr+m*nrow,&BT);
188: MatDenseSetLDA(BT,nrow);
189: if (A->hermitian && PetscDefined(USE_COMPLEX)) {
190: MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT);
191: } else {
192: MatTranspose(B,MAT_REUSE_MATRIX,&BT);
193: }
194: MatDestroy(&B);
195: MatDestroy(&BT);
196: } else {
197: for (PetscInt k=0; k<A->rmap->n; ++k) { /* block C from above */
198: a->wrapper->copy_submatrix(m,1,idxr,idxc+m+k,ptr+(m+k)*nrow);
199: }
200: }
201: }
202: if (m+A->rmap->n != nrow) {
203: a->wrapper->copy_submatrix(nrow,std::distance(it+A->rmap->n,idxr+nrow),idxr,idxc+m+A->rmap->n,ptr+(m+A->rmap->n)*nrow); /* vertical block E from above */
204: /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
205: if (A->symmetric || A->hermitian) {
206: MatCreateDense(PETSC_COMM_SELF,A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),ptr+(m+A->rmap->n)*nrow+m,&B);
207: MatDenseSetLDA(B,nrow);
208: MatCreateDense(PETSC_COMM_SELF,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,ptr+m*nrow+m+A->rmap->n,&BT);
209: MatDenseSetLDA(BT,nrow);
210: if (A->hermitian && PetscDefined(USE_COMPLEX)) {
211: MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT);
212: } else {
213: MatTranspose(B,MAT_REUSE_MATRIX,&BT);
214: }
215: MatDestroy(&B);
216: MatDestroy(&BT);
217: } else {
218: for (PetscInt k=0; k<A->rmap->n; ++k) { /* block F from above */
219: a->wrapper->copy_submatrix(std::distance(it+A->rmap->n,idxr+nrow),1,it+A->rmap->n,idxc+m+k,ptr+(m+k)*nrow+m+A->rmap->n);
220: }
221: }
222: }
223: } /* complete local diagonal block not in IS */
224: } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
225: } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
226: } /* unsorted IS */
227: }
228: } else flg = PETSC_FALSE; /* different row and column IS */
229: if (!flg) a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* reassemble everything */
230: ISRestoreIndices(irow[i],&idxr);
231: ISRestoreIndices(icol[i],&idxc);
232: MatDenseRestoreArrayWrite((*submat)[i],&ptr);
233: MatAssemblyBegin((*submat)[i],MAT_FINAL_ASSEMBLY);
234: MatAssemblyEnd((*submat)[i],MAT_FINAL_ASSEMBLY);
235: MatScale((*submat)[i],a->s);
236: }
237: return(0);
238: }
240: static PetscErrorCode MatDestroy_Htool(Mat A)
241: {
242: Mat_Htool *a = (Mat_Htool*)A->data;
243: PetscContainer container;
244: MatHtoolKernelTranspose *kernelt;
245: PetscErrorCode ierr;
248: PetscObjectChangeTypeName((PetscObject)A,NULL);
249: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",NULL);
250: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",NULL);
251: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",NULL);
252: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",NULL);
253: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",NULL);
254: PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",NULL);
255: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",NULL);
256: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",NULL);
257: PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",NULL);
258: PetscObjectQuery((PetscObject)A,"KernelTranspose",(PetscObject*)&container);
259: if (container) { /* created in MatTranspose_Htool() */
260: PetscContainerGetPointer(container,(void**)&kernelt);
261: MatDestroy(&kernelt->A);
262: PetscFree(kernelt);
263: PetscContainerDestroy(&container);
264: PetscObjectCompose((PetscObject)A,"KernelTranspose",NULL);
265: }
266: if (a->gcoords_source != a->gcoords_target) {
267: PetscFree(a->gcoords_source);
268: }
269: PetscFree(a->gcoords_target);
270: PetscFree2(a->work_source,a->work_target);
271: delete a->wrapper;
272: delete a->hmatrix;
273: PetscFree(A->data);
274: return(0);
275: }
277: static PetscErrorCode MatView_Htool(Mat A,PetscViewer pv)
278: {
279: Mat_Htool *a = (Mat_Htool*)A->data;
280: PetscBool flg;
284: PetscObjectTypeCompare((PetscObject)pv,PETSCVIEWERASCII,&flg);
285: if (flg) {
286: PetscViewerASCIIPrintf(pv,"symmetry: %c\n",a->hmatrix->get_symmetry_type());
287: if (PetscAbsScalar(a->s-1.0) > PETSC_MACHINE_EPSILON) {
288: #if defined(PETSC_USE_COMPLEX)
289: PetscViewerASCIIPrintf(pv,"scaling: %g+%gi\n",(double)PetscRealPart(a->s),(double)PetscImaginaryPart(a->s));
290: #else
291: PetscViewerASCIIPrintf(pv,"scaling: %g\n",(double)a->s);
292: #endif
293: }
294: PetscViewerASCIIPrintf(pv,"minimum cluster size: %D\n",a->bs[0]);
295: PetscViewerASCIIPrintf(pv,"maximum block size: %D\n",a->bs[1]);
296: PetscViewerASCIIPrintf(pv,"epsilon: %g\n",(double)a->epsilon);
297: PetscViewerASCIIPrintf(pv,"eta: %g\n",(double)a->eta);
298: PetscViewerASCIIPrintf(pv,"minimum target depth: %D\n",a->depth[0]);
299: PetscViewerASCIIPrintf(pv,"minimum source depth: %D\n",a->depth[1]);
300: PetscViewerASCIIPrintf(pv,"compressor: %s\n",MatHtoolCompressorTypes[a->compressor]);
301: PetscViewerASCIIPrintf(pv,"clustering: %s\n",MatHtoolClusteringTypes[a->clustering]);
302: PetscViewerASCIIPrintf(pv,"compression ratio: %s\n",a->hmatrix->get_infos("Compression_ratio").c_str());
303: PetscViewerASCIIPrintf(pv,"space saving: %s\n",a->hmatrix->get_infos("Space_saving").c_str());
304: PetscViewerASCIIPrintf(pv,"number of dense (resp. low rank) matrices: %s (resp. %s)\n",a->hmatrix->get_infos("Number_of_dmat").c_str(),a->hmatrix->get_infos("Number_of_lrmat").c_str());
305: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Dense_block_size_min").c_str(),a->hmatrix->get_infos("Dense_block_size_mean").c_str(),a->hmatrix->get_infos("Dense_block_size_max").c_str());
306: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Low_rank_block_size_min").c_str(),a->hmatrix->get_infos("Low_rank_block_size_mean").c_str(),a->hmatrix->get_infos("Low_rank_block_size_max").c_str());
307: PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) ranks: (%s, %s, %s)\n",a->hmatrix->get_infos("Rank_min").c_str(),a->hmatrix->get_infos("Rank_mean").c_str(),a->hmatrix->get_infos("Rank_max").c_str());
308: }
309: return(0);
310: }
312: static PetscErrorCode MatScale_Htool(Mat A,PetscScalar s)
313: {
314: Mat_Htool *a = (Mat_Htool*)A->data;
317: a->s *= s;
318: return(0);
319: }
321: /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
322: static PetscErrorCode MatGetRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
323: {
324: Mat_Htool *a = (Mat_Htool*)A->data;
325: PetscInt *idxc;
326: PetscBLASInt one = 1,bn;
330: if (nz) *nz = A->cmap->N;
331: if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
332: PetscMalloc1(A->cmap->N,&idxc);
333: for (PetscInt i=0; i<A->cmap->N; ++i) idxc[i] = i;
334: }
335: if (idx) *idx = idxc;
336: if (v) {
337: PetscMalloc1(A->cmap->N,v);
338: if (a->wrapper) a->wrapper->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
339: else reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx)->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
340: PetscBLASIntCast(A->cmap->N,&bn);
341: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a->s,*v,&one));
342: }
343: if (!idx) {
344: PetscFree(idxc);
345: }
346: return(0);
347: }
349: static PetscErrorCode MatRestoreRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
350: {
354: if (nz) *nz = 0;
355: if (idx) {
356: PetscFree(*idx);
357: }
358: if (v) {
359: PetscFree(*v);
360: }
361: return(0);
362: }
364: static PetscErrorCode MatSetFromOptions_Htool(PetscOptionItems *PetscOptionsObject,Mat A)
365: {
366: Mat_Htool *a = (Mat_Htool*)A->data;
367: PetscInt n;
368: PetscBool flg;
372: PetscOptionsHead(PetscOptionsObject,"Htool options");
373: PetscOptionsInt("-mat_htool_min_cluster_size","Minimal leaf size in cluster tree",NULL,a->bs[0],a->bs,NULL);
374: PetscOptionsInt("-mat_htool_max_block_size","Maximal number of coefficients in a dense block",NULL,a->bs[1],a->bs + 1,NULL);
375: PetscOptionsReal("-mat_htool_epsilon","Relative error in Frobenius norm when approximating a block",NULL,a->epsilon,&a->epsilon,NULL);
376: PetscOptionsReal("-mat_htool_eta","Admissibility condition tolerance",NULL,a->eta,&a->eta,NULL);
377: PetscOptionsInt("-mat_htool_min_target_depth","Minimal cluster tree depth associated with the rows",NULL,a->depth[0],a->depth,NULL);
378: PetscOptionsInt("-mat_htool_min_source_depth","Minimal cluster tree depth associated with the columns",NULL,a->depth[1],a->depth + 1,NULL);
379: n = 0;
380: PetscOptionsEList("-mat_htool_compressor","Type of compression","MatHtoolCompressorType",MatHtoolCompressorTypes,ALEN(MatHtoolCompressorTypes),MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA],&n,&flg);
381: if (flg) a->compressor = MatHtoolCompressorType(n);
382: n = 0;
383: PetscOptionsEList("-mat_htool_clustering","Type of clustering","MatHtoolClusteringType",MatHtoolClusteringTypes,ALEN(MatHtoolClusteringTypes),MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR],&n,&flg);
384: if (flg) a->clustering = MatHtoolClusteringType(n);
385: PetscOptionsTail();
386: return(0);
387: }
389: static PetscErrorCode MatAssemblyEnd_Htool(Mat A,MatAssemblyType type)
390: {
391: Mat_Htool *a = (Mat_Htool*)A->data;
392: const PetscInt *ranges;
393: PetscInt *offset;
394: PetscMPIInt size;
395: char S = PetscDefined(USE_COMPLEX) && A->hermitian ? 'H' : (A->symmetric ? 'S' : 'N'),uplo = S == 'N' ? 'N' : 'U';
396: htool::VirtualGenerator<PetscScalar> *generator = nullptr;
397: std::shared_ptr<htool::VirtualCluster> t,s = nullptr;
398: std::shared_ptr<htool::VirtualLowRankGenerator<PetscScalar>> compressor = nullptr;
399: PetscErrorCode ierr;
402: PetscCitationsRegister(HtoolCitation,&HtoolCite);
403: delete a->wrapper;
404: delete a->hmatrix;
405: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
406: PetscMalloc1(2*size,&offset);
407: MatGetOwnershipRanges(A,&ranges);
408: for (PetscInt i=0; i<size; ++i) {
409: offset[2*i] = ranges[i];
410: offset[2*i+1] = ranges[i+1] - ranges[i];
411: }
412: switch (a->clustering) {
413: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
414: t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
415: break;
416: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
417: t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
418: break;
419: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
420: t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
421: break;
422: default:
423: t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
424: }
425: t->set_minclustersize(a->bs[0]);
426: t->build(A->rmap->N,a->gcoords_target,offset);
427: if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
428: else {
429: a->wrapper = NULL;
430: generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx);
431: }
432: if (a->gcoords_target != a->gcoords_source) {
433: MatGetOwnershipRangesColumn(A,&ranges);
434: for (PetscInt i=0; i<size; ++i) {
435: offset[2*i] = ranges[i];
436: offset[2*i+1] = ranges[i+1] - ranges[i];
437: }
438: switch (a->clustering) {
439: case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
440: s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
441: break;
442: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
443: s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
444: break;
445: case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
446: s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
447: break;
448: default:
449: s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
450: }
451: s->set_minclustersize(a->bs[0]);
452: s->build(A->cmap->N,a->gcoords_source,offset);
453: S = uplo = 'N';
454: }
455: PetscFree(offset);
456: switch (a->compressor) {
457: case MAT_HTOOL_COMPRESSOR_FULL_ACA:
458: compressor = std::make_shared<htool::fullACA<PetscScalar>>();
459: break;
460: case MAT_HTOOL_COMPRESSOR_SVD:
461: compressor = std::make_shared<htool::SVD<PetscScalar>>();
462: break;
463: default:
464: compressor = std::make_shared<htool::sympartialACA<PetscScalar>>();
465: }
466: a->hmatrix = dynamic_cast<htool::VirtualHMatrix<PetscScalar>*>(new htool::HMatrix<PetscScalar>(t,s ? s : t,a->epsilon,a->eta,S,uplo));
467: a->hmatrix->set_compression(compressor);
468: a->hmatrix->set_maxblocksize(a->bs[1]);
469: a->hmatrix->set_mintargetdepth(a->depth[0]);
470: a->hmatrix->set_minsourcedepth(a->depth[1]);
471: if (s) a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target,a->gcoords_source);
472: else a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target);
473: return(0);
474: }
476: static PetscErrorCode MatProductNumeric_Htool(Mat C)
477: {
478: Mat_Product *product = C->product;
479: Mat_Htool *a = (Mat_Htool*)product->A->data;
480: const PetscScalar *in;
481: PetscScalar *out;
482: PetscInt lda;
483: PetscErrorCode ierr;
486: MatCheckProduct(C,1);
487: switch (product->type) {
488: case MATPRODUCT_AB:
489: PetscInt N;
490: MatGetSize(C,NULL,&N);
491: MatDenseGetLDA(C,&lda);
492: if (lda != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%D != %D)",lda,C->rmap->n);
493: MatDenseGetArrayRead(product->B,&in);
494: MatDenseGetArrayWrite(C,&out);
495: a->hmatrix->mvprod_local_to_local(in,out,N);
496: MatDenseRestoreArrayWrite(C,&out);
497: MatDenseRestoreArrayRead(product->B,&in);
498: MatScale(C,a->s);
499: break;
500: default:
501: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductType %s is not supported",MatProductTypes[product->type]);
502: }
503: return(0);
504: }
506: static PetscErrorCode MatProductSymbolic_Htool(Mat C)
507: {
508: Mat_Product *product = C->product;
509: Mat A,B;
510: PetscBool flg;
514: MatCheckProduct(C,1);
515: A = product->A;
516: B = product->B;
517: PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,"");
518: if (!flg) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatProduct_AB not supported for %s",((PetscObject)product->B)->type_name);
519: switch (product->type) {
520: case MATPRODUCT_AB:
521: if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
522: MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);
523: }
524: MatSetType(C,MATDENSE);
525: MatSetUp(C);
526: MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
527: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
528: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
529: break;
530: default:
531: SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"ProductType %s is not supported",MatProductTypes[product->type]);
532: }
533: C->ops->productsymbolic = NULL;
534: C->ops->productnumeric = MatProductNumeric_Htool;
535: return(0);
536: }
538: static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
539: {
541: MatCheckProduct(C,1);
542: if (C->product->type == MATPRODUCT_AB) C->ops->productsymbolic = MatProductSymbolic_Htool;
543: return(0);
544: }
546: static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
547: {
548: Mat_Htool *a = (Mat_Htool*)A->data;
551: *hmatrix = a->hmatrix;
552: return(0);
553: }
555: /*@C
556: MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a MATHTOOL.
558: Input Parameter:
559: . A - hierarchical matrix
561: Output Parameter:
562: . hmatrix - opaque pointer to a Htool virtual matrix
564: Level: advanced
566: .seealso: MATHTOOL
567: @*/
568: PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
569: {
575: PetscTryMethod(A,"MatHtoolGetHierarchicalMat_C",(Mat,const htool::VirtualHMatrix<PetscScalar>**),(A,hmatrix));
576: return(0);
577: }
579: static PetscErrorCode MatHtoolSetKernel_Htool(Mat A,MatHtoolKernel kernel,void *kernelctx)
580: {
581: Mat_Htool *a = (Mat_Htool*)A->data;
584: a->kernel = kernel;
585: a->kernelctx = kernelctx;
586: delete a->wrapper;
587: if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
588: return(0);
589: }
591: /*@C
592: MatHtoolSetKernel - Sets the kernel and context used for the assembly of a MATHTOOL.
594: Input Parameters:
595: + A - hierarchical matrix
596: . kernel - computational kernel (or NULL)
597: - kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
599: Level: advanced
601: .seealso: MATHTOOL, MatCreateHtoolFromKernel()
602: @*/
603: PETSC_EXTERN PetscErrorCode MatHtoolSetKernel(Mat A,MatHtoolKernel kernel,void *kernelctx)
604: {
611: PetscTryMethod(A,"MatHtoolSetKernel_C",(Mat,MatHtoolKernel,void*),(A,kernel,kernelctx));
612: return(0);
613: }
615: static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A,IS* is)
616: {
617: Mat_Htool *a = (Mat_Htool*)A->data;
618: std::vector<PetscInt> source;
619: PetscErrorCode ierr;
622: source = a->hmatrix->get_local_perm_source();
623: ISCreateGeneral(PetscObjectComm((PetscObject)A),source.size(),source.data(),PETSC_COPY_VALUES,is);
624: ISSetPermutation(*is);
625: return(0);
626: }
628: /*@C
629: MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster.
631: Input Parameter:
632: . A - hierarchical matrix
634: Output Parameter:
635: . is - permutation
637: Level: advanced
639: .seealso: MATHTOOL, MatHtoolGetPermutationTarget(), MatHtoolUsePermutation()
640: @*/
641: PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationSource(Mat A,IS* is)
642: {
648: PetscTryMethod(A,"MatHtoolGetPermutationSource_C",(Mat,IS*),(A,is));
649: return(0);
650: }
652: static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A,IS* is)
653: {
654: Mat_Htool *a = (Mat_Htool*)A->data;
655: std::vector<PetscInt> target;
656: PetscErrorCode ierr;
659: target = a->hmatrix->get_local_perm_target();
660: ISCreateGeneral(PetscObjectComm((PetscObject)A),target.size(),target.data(),PETSC_COPY_VALUES,is);
661: ISSetPermutation(*is);
662: return(0);
663: }
665: /*@C
666: MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster.
668: Input Parameter:
669: . A - hierarchical matrix
671: Output Parameter:
672: . is - permutation
674: Level: advanced
676: .seealso: MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolUsePermutation()
677: @*/
678: PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationTarget(Mat A,IS* is)
679: {
685: PetscTryMethod(A,"MatHtoolGetPermutationTarget_C",(Mat,IS*),(A,is));
686: return(0);
687: }
689: static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A,PetscBool use)
690: {
691: Mat_Htool *a = (Mat_Htool*)A->data;
694: a->hmatrix->set_use_permutation(use);
695: return(0);
696: }
698: /*@C
699: MatHtoolUsePermutation - Sets whether MATHTOOL should permute input (resp. output) vectors following its internal source (resp. target) permutation.
701: Input Parameters:
702: + A - hierarchical matrix
703: - use - Boolean value
705: Level: advanced
707: .seealso: MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolGetPermutationTarget()
708: @*/
709: PETSC_EXTERN PetscErrorCode MatHtoolUsePermutation(Mat A,PetscBool use)
710: {
716: PetscTryMethod(A,"MatHtoolUsePermutation_C",(Mat,PetscBool),(A,use));
717: return(0);
718: }
720: static PetscErrorCode MatConvert_Htool_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
721: {
722: Mat C;
723: Mat_Htool *a = (Mat_Htool*)A->data;
724: PetscInt lda;
725: PetscScalar *array;
729: if (reuse == MAT_REUSE_MATRIX) {
730: C = *B;
731: if (C->rmap->n != A->rmap->n || C->cmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible dimensions");
732: MatDenseGetLDA(C,&lda);
733: if (lda != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%D != %D)",lda,C->rmap->n);
734: } else {
735: MatCreate(PetscObjectComm((PetscObject)A),&C);
736: MatSetSizes(C,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
737: MatSetType(C,MATDENSE);
738: MatSetUp(C);
739: MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
740: }
741: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
742: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
743: MatDenseGetArrayWrite(C,&array);
744: a->hmatrix->copy_local_dense_perm(array);
745: MatDenseRestoreArrayWrite(C,&array);
746: MatScale(C,a->s);
747: if (reuse == MAT_INPLACE_MATRIX) {
748: MatHeaderReplace(A,&C);
749: } else *B = C;
750: return(0);
751: }
753: static PetscErrorCode GenEntriesTranspose(PetscInt sdim,PetscInt M,PetscInt N,const PetscInt *rows,const PetscInt *cols,PetscScalar *ptr,void *ctx)
754: {
755: MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose*)ctx;
756: PetscScalar *tmp;
757: PetscErrorCode ierr;
760: generator->kernel(sdim,N,M,cols,rows,ptr,generator->kernelctx);
761: PetscMalloc1(M*N,&tmp);
762: PetscArraycpy(tmp,ptr,M*N);
763: for (PetscInt i=0; i<M; ++i) {
764: for (PetscInt j=0; j<N; ++j) ptr[i+j*M] = tmp[j+i*N];
765: }
766: PetscFree(tmp);
767: return(0);
768: }
770: /* naive implementation which keeps a reference to the original Mat */
771: static PetscErrorCode MatTranspose_Htool(Mat A,MatReuse reuse,Mat *B)
772: {
773: Mat C;
774: Mat_Htool *a = (Mat_Htool*)A->data,*c;
775: PetscInt M = A->rmap->N,N = A->cmap->N,m = A->rmap->n,n = A->cmap->n;
776: PetscContainer container;
777: MatHtoolKernelTranspose *kernelt;
778: PetscErrorCode ierr;
781: if (reuse == MAT_INPLACE_MATRIX) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatTranspose() with MAT_INPLACE_MATRIX not supported");
782: if (reuse == MAT_INITIAL_MATRIX) {
783: MatCreate(PetscObjectComm((PetscObject)A),&C);
784: MatSetSizes(C,n,m,N,M);
785: MatSetType(C,((PetscObject)A)->type_name);
786: MatSetUp(C);
787: PetscContainerCreate(PetscObjectComm((PetscObject)C),&container);
788: PetscNew(&kernelt);
789: PetscContainerSetPointer(container,kernelt);
790: PetscObjectCompose((PetscObject)C,"KernelTranspose",(PetscObject)container);
791: } else {
792: C = *B;
793: PetscObjectQuery((PetscObject)C,"KernelTranspose",(PetscObject*)&container);
794: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatTranspose() with MAT_INITIAL_MATRIX first");
795: PetscContainerGetPointer(container,(void**)&kernelt);
796: }
797: c = (Mat_Htool*)C->data;
798: c->dim = a->dim;
799: c->s = a->s;
800: c->kernel = GenEntriesTranspose;
801: if (kernelt->A != A) {
802: MatDestroy(&kernelt->A);
803: kernelt->A = A;
804: PetscObjectReference((PetscObject)A);
805: }
806: kernelt->kernel = a->kernel;
807: kernelt->kernelctx = a->kernelctx;
808: c->kernelctx = kernelt;
809: if (reuse == MAT_INITIAL_MATRIX) {
810: PetscMalloc1(N*c->dim,&c->gcoords_target);
811: PetscArraycpy(c->gcoords_target,a->gcoords_source,N*c->dim);
812: if (a->gcoords_target != a->gcoords_source) {
813: PetscMalloc1(M*c->dim,&c->gcoords_source);
814: PetscArraycpy(c->gcoords_source,a->gcoords_target,M*c->dim);
815: } else c->gcoords_source = c->gcoords_target;
816: PetscCalloc2(M,&c->work_source,N,&c->work_target);
817: }
818: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
819: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
820: if (reuse == MAT_INITIAL_MATRIX) *B = C;
821: return(0);
822: }
824: /*@C
825: MatCreateHtoolFromKernel - Creates a MATHTOOL from a user-supplied kernel.
827: Input Parameters:
828: + comm - MPI communicator
829: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
830: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
831: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
832: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
833: . spacedim - dimension of the space coordinates
834: . coords_target - coordinates of the target
835: . coords_source - coordinates of the source
836: . kernel - computational kernel (or NULL)
837: - kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
839: Output Parameter:
840: . B - matrix
842: Options Database Keys:
843: + -mat_htool_min_cluster_size <PetscInt> - minimal leaf size in cluster tree
844: . -mat_htool_max_block_size <PetscInt> - maximal number of coefficients in a dense block
845: . -mat_htool_epsilon <PetscReal> - relative error in Frobenius norm when approximating a block
846: . -mat_htool_eta <PetscReal> - admissibility condition tolerance
847: . -mat_htool_min_target_depth <PetscInt> - minimal cluster tree depth associated with the rows
848: . -mat_htool_min_source_depth <PetscInt> - minimal cluster tree depth associated with the columns
849: . -mat_htool_compressor <sympartialACA, fullACA, SVD> - type of compression
850: - -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
852: Level: intermediate
854: .seealso: MatCreate(), MATHTOOL, PCSetCoordinates(), MatHtoolSetKernel(), MatHtoolCompressorType, MATH2OPUS, MatCreateH2OpusFromKernel()
855: @*/
856: PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords_target[],const PetscReal coords_source[],MatHtoolKernel kernel,void *kernelctx,Mat *B)
857: {
858: Mat A;
859: Mat_Htool *a;
863: MatCreate(comm,&A);
869: MatSetSizes(A,m,n,M,N);
870: MatSetType(A,MATHTOOL);
871: MatSetUp(A);
872: a = (Mat_Htool*)A->data;
873: a->dim = spacedim;
874: a->s = 1.0;
875: a->kernel = kernel;
876: a->kernelctx = kernelctx;
877: PetscCalloc1(A->rmap->N*spacedim,&a->gcoords_target);
878: PetscArraycpy(a->gcoords_target+A->rmap->rstart*spacedim,coords_target,A->rmap->n*spacedim);
879: MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_target,A->rmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A)); /* global target coordinates */
880: if (coords_target != coords_source) {
881: PetscCalloc1(A->cmap->N*spacedim,&a->gcoords_source);
882: PetscArraycpy(a->gcoords_source+A->cmap->rstart*spacedim,coords_source,A->cmap->n*spacedim);
883: MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_source,A->cmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A)); /* global source coordinates */
884: } else a->gcoords_source = a->gcoords_target;
885: PetscCalloc2(A->cmap->N,&a->work_source,A->rmap->N,&a->work_target);
886: *B = A;
887: return(0);
888: }
890: /*MC
891: MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
893: Use ./configure --download-htool to install PETSc to use Htool.
895: Options Database Keys:
896: . -mat_type htool - matrix type to "htool" during a call to MatSetFromOptions()
898: Level: beginner
900: .seealso: MATH2OPUS, MATDENSE, MatCreateHtoolFromKernel(), MatHtoolSetKernel()
901: M*/
902: PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
903: {
904: Mat_Htool *a;
908: PetscNewLog(A,&a);
909: A->data = (void*)a;
910: PetscObjectChangeTypeName((PetscObject)A,MATHTOOL);
911: PetscMemzero(A->ops,sizeof(struct _MatOps));
912: A->ops->getdiagonal = MatGetDiagonal_Htool;
913: A->ops->getdiagonalblock = MatGetDiagonalBlock_Htool;
914: A->ops->mult = MatMult_Htool;
915: A->ops->multadd = MatMultAdd_Htool;
916: A->ops->increaseoverlap = MatIncreaseOverlap_Htool;
917: A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
918: A->ops->transpose = MatTranspose_Htool;
919: A->ops->destroy = MatDestroy_Htool;
920: A->ops->view = MatView_Htool;
921: A->ops->setfromoptions = MatSetFromOptions_Htool;
922: A->ops->scale = MatScale_Htool;
923: A->ops->getrow = MatGetRow_Htool;
924: A->ops->restorerow = MatRestoreRow_Htool;
925: A->ops->assemblyend = MatAssemblyEnd_Htool;
926: a->dim = 0;
927: a->gcoords_target = NULL;
928: a->gcoords_source = NULL;
929: a->s = 1.0;
930: a->bs[0] = 10;
931: a->bs[1] = 1000000;
932: a->epsilon = PetscSqrtReal(PETSC_SMALL);
933: a->eta = 10.0;
934: a->depth[0] = 0;
935: a->depth[1] = 0;
936: a->compressor = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
937: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",MatProductSetFromOptions_Htool);
938: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",MatProductSetFromOptions_Htool);
939: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",MatConvert_Htool_Dense);
940: PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",MatConvert_Htool_Dense);
941: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",MatHtoolGetHierarchicalMat_Htool);
942: PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",MatHtoolSetKernel_Htool);
943: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",MatHtoolGetPermutationSource_Htool);
944: PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",MatHtoolGetPermutationTarget_Htool);
945: PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",MatHtoolUsePermutation_Htool);
946: return(0);
947: }