Actual source code: ex192.c
petsc-3.14.6 2021-03-30
2: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
3: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";
5: #include <petscmat.h>
7: int main(int argc,char **args)
8: {
9: Mat A,RHS,C,F,X,S;
10: Vec u,x,b;
11: Vec xschur,bschur,uschur;
12: IS is_schur;
14: PetscMPIInt size;
15: PetscInt isolver=0,size_schur,m,n,nfact,nsolve,nrhs;
16: PetscReal norm,tol=PETSC_SQRT_MACHINE_EPSILON;
17: PetscRandom rand;
18: PetscBool data_provided,herm,symm,use_lu,cuda = PETSC_FALSE;
19: PetscReal sratio = 5.1/12.;
20: PetscViewer fd; /* viewer */
21: char solver[256];
22: char file[PETSC_MAX_PATH_LEN]; /* input file name */
24: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
25: MPI_Comm_size(PETSC_COMM_WORLD, &size);
26: if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor test");
27: /* Determine which type of solver we want to test for */
28: herm = PETSC_FALSE;
29: symm = PETSC_FALSE;
30: PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);
31: PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);
32: if (herm) symm = PETSC_TRUE;
33: PetscOptionsGetBool(NULL,NULL,"-cuda_solve",&cuda,NULL);
34: PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);
36: /* Determine file from which we read the matrix A */
37: PetscOptionsGetString(NULL,NULL,"-f",file,sizeof(file),&data_provided);
38: if (!data_provided) { /* get matrices from PETSc distribution */
39: PetscStrncpy(file,"${PETSC_DIR}/share/petsc/datafiles/matrices/",sizeof(file));
40: if (symm) {
41: #if defined (PETSC_USE_COMPLEX)
42: PetscStrlcat(file,"hpd-complex-",sizeof(file));
43: #else
44: PetscStrlcat(file,"spd-real-",sizeof(file));
45: #endif
46: } else {
47: #if defined (PETSC_USE_COMPLEX)
48: PetscStrlcat(file,"nh-complex-",sizeof(file));
49: #else
50: PetscStrlcat(file,"ns-real-",sizeof(file));
51: #endif
52: }
53: #if defined(PETSC_USE_64BIT_INDICES)
54: PetscStrlcat(file,"int64-",sizeof(file));
55: #else
56: PetscStrlcat(file,"int32-",sizeof(file));
57: #endif
58: #if defined (PETSC_USE_REAL_SINGLE)
59: PetscStrlcat(file,"float32",sizeof(file));
60: #else
61: PetscStrlcat(file,"float64",sizeof(file));
62: #endif
63: }
64: /* Load matrix A */
65: PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);
66: MatCreate(PETSC_COMM_WORLD,&A);
67: MatLoad(A,fd);
68: PetscViewerDestroy(&fd);
69: MatGetSize(A,&m,&n);
70: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n);
72: /* Create dense matrix C and X; C holds true solution with identical colums */
73: nrhs = 2;
74: PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
75: MatCreate(PETSC_COMM_WORLD,&C);
76: MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);
77: MatSetType(C,MATDENSE);
78: MatSetFromOptions(C);
79: MatSetUp(C);
81: PetscRandomCreate(PETSC_COMM_WORLD,&rand);
82: PetscRandomSetFromOptions(rand);
83: MatSetRandom(C,rand);
84: MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);
86: /* Create vectors */
87: VecCreate(PETSC_COMM_WORLD,&x);
88: VecSetSizes(x,n,PETSC_DECIDE);
89: VecSetFromOptions(x);
90: VecDuplicate(x,&b);
91: VecDuplicate(x,&u); /* save the true solution */
93: PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);
94: switch (isolver) {
95: #if defined(PETSC_HAVE_MUMPS)
96: case 0:
97: PetscStrcpy(solver,MATSOLVERMUMPS);
98: break;
99: #endif
100: #if defined(PETSC_HAVE_MKL_PARDISO)
101: case 1:
102: PetscStrcpy(solver,MATSOLVERMKL_PARDISO);
103: break;
104: #endif
105: default:
106: PetscStrcpy(solver,MATSOLVERPETSC);
107: break;
108: }
110: #if defined (PETSC_USE_COMPLEX)
111: if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
112: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
113: PetscScalar val = -1.0;
114: val = val + im;
115: MatSetValue(A,1,0,val,INSERT_VALUES);
116: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
117: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
118: }
119: #endif
121: PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);
122: if (sratio < 0. || sratio > 1.) {
123: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %f", sratio);
124: }
125: size_schur = (PetscInt)(sratio*m);
127: PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %D, sym %d, herm %d, size schur %D, size mat %D\n",solver,nrhs,symm,herm,size_schur,m);
129: /* Test LU/Cholesky Factorization */
130: use_lu = PETSC_FALSE;
131: if (!symm) use_lu = PETSC_TRUE;
132: #if defined (PETSC_USE_COMPLEX)
133: if (isolver == 1) use_lu = PETSC_TRUE;
134: #endif
135: if (cuda && symm && !herm) use_lu = PETSC_TRUE;
137: if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
138: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
139: MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);
140: }
142: if (use_lu) {
143: MatGetFactor(A,solver,MAT_FACTOR_LU,&F);
144: } else {
145: if (herm) {
146: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
147: MatSetOption(A,MAT_SPD,PETSC_TRUE);
148: } else {
149: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
150: MatSetOption(A,MAT_SPD,PETSC_FALSE);
151: }
152: MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);
153: }
154: ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);
155: MatFactorSetSchurIS(F,is_schur);
157: ISDestroy(&is_schur);
158: if (use_lu) {
159: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
160: } else {
161: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
162: }
164: for (nfact = 0; nfact < 3; nfact++) {
165: Mat AD;
167: if (!nfact) {
168: VecSetRandom(x,rand);
169: if (symm && herm) {
170: VecAbs(x);
171: }
172: MatDiagonalSet(A,x,ADD_VALUES);
173: }
174: if (use_lu) {
175: MatLUFactorNumeric(F,A,NULL);
176: } else {
177: MatCholeskyFactorNumeric(F,A,NULL);
178: }
179: if (cuda) {
180: MatFactorGetSchurComplement(F,&S,NULL);
181: MatSetType(S,MATSEQDENSECUDA);
182: MatCreateVecs(S,&xschur,&bschur);
183: MatFactorRestoreSchurComplement(F,&S,MAT_FACTOR_SCHUR_UNFACTORED);
184: }
185: MatFactorCreateSchurComplement(F,&S,NULL);
186: if (!cuda) {
187: MatCreateVecs(S,&xschur,&bschur);
188: }
189: VecDuplicate(xschur,&uschur);
190: if (nfact == 1 && (!cuda || (herm && symm))) {
191: MatFactorInvertSchurComplement(F);
192: }
193: for (nsolve = 0; nsolve < 2; nsolve++) {
194: VecSetRandom(x,rand);
195: VecCopy(x,u);
197: if (nsolve) {
198: MatMult(A,x,b);
199: MatSolve(F,b,x);
200: } else {
201: MatMultTranspose(A,x,b);
202: MatSolveTranspose(F,b,x);
203: }
204: /* Check the error */
205: VecAXPY(u,-1.0,x); /* u <- (-1.0)x + u */
206: VecNorm(u,NORM_2,&norm);
207: if (norm > tol) {
208: PetscReal resi;
209: if (nsolve) {
210: MatMult(A,x,u); /* u = A*x */
211: } else {
212: MatMultTranspose(A,x,u); /* u = A*x */
213: }
214: VecAXPY(u,-1.0,b); /* u <- (-1.0)b + u */
215: VecNorm(u,NORM_2,&resi);
216: if (nsolve) {
217: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolve error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
218: } else {
219: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
220: }
221: }
222: VecSetRandom(xschur,rand);
223: VecCopy(xschur,uschur);
224: if (nsolve) {
225: MatMult(S,xschur,bschur);
226: MatFactorSolveSchurComplement(F,bschur,xschur);
227: } else {
228: MatMultTranspose(S,xschur,bschur);
229: MatFactorSolveSchurComplementTranspose(F,bschur,xschur);
230: }
231: /* Check the error */
232: VecAXPY(uschur,-1.0,xschur); /* u <- (-1.0)x + u */
233: VecNorm(uschur,NORM_2,&norm);
234: if (norm > tol) {
235: PetscReal resi;
236: if (nsolve) {
237: MatMult(S,xschur,uschur); /* u = A*x */
238: } else {
239: MatMultTranspose(S,xschur,uschur); /* u = A*x */
240: }
241: VecAXPY(uschur,-1.0,bschur); /* u <- (-1.0)b + u */
242: VecNorm(uschur,NORM_2,&resi);
243: if (nsolve) {
244: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplement error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
245: } else {
246: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
247: }
248: }
249: }
250: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD);
251: if (!nfact) {
252: MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);
253: } else {
254: MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);
255: }
256: MatDestroy(&AD);
257: for (nsolve = 0; nsolve < 2; nsolve++) {
258: MatMatSolve(F,RHS,X);
260: /* Check the error */
261: MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
262: MatNorm(X,NORM_FROBENIUS,&norm);
263: if (norm > tol) {
264: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
265: }
266: }
267: if (isolver == 0) {
268: Mat spRHS,spRHST,RHST;
270: MatTranspose(RHS,MAT_INITIAL_MATRIX,&RHST);
271: MatConvert(RHST,MATSEQAIJ,MAT_INITIAL_MATRIX,&spRHST);
272: MatCreateTranspose(spRHST,&spRHS);
273: for (nsolve = 0; nsolve < 2; nsolve++) {
274: MatMatSolve(F,spRHS,X);
276: /* Check the error */
277: MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
278: MatNorm(X,NORM_FROBENIUS,&norm);
279: if (norm > tol) {
280: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) sparse MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
281: }
282: }
283: MatDestroy(&spRHST);
284: MatDestroy(&spRHS);
285: MatDestroy(&RHST);
286: }
287: MatDestroy(&S);
288: VecDestroy(&xschur);
289: VecDestroy(&bschur);
290: VecDestroy(&uschur);
291: }
292: /* Free data structures */
293: MatDestroy(&A);
294: MatDestroy(&C);
295: MatDestroy(&F);
296: MatDestroy(&X);
297: MatDestroy(&RHS);
298: PetscRandomDestroy(&rand);
299: VecDestroy(&x);
300: VecDestroy(&b);
301: VecDestroy(&u);
302: PetscFinalize();
303: return ierr;
304: }
307: /*TEST
309: testset:
310: requires: mkl_pardiso double !complex !define(PETSC_USE_64BIT_INDICES)
311: args: -solver 1
313: test:
314: suffix: mkl_pardiso
315: test:
316: requires: cuda
317: suffix: mkl_pardiso_cuda
318: args: -cuda_solve
319: output_file: output/ex192_mkl_pardiso.out
320: test:
321: suffix: mkl_pardiso_1
322: args: -symmetric_solve
323: output_file: output/ex192_mkl_pardiso_1.out
324: test:
325: requires: cuda
326: suffix: mkl_pardiso_cuda_1
327: args: -symmetric_solve -cuda_solve
328: output_file: output/ex192_mkl_pardiso_1.out
329: test:
330: suffix: mkl_pardiso_3
331: args: -symmetric_solve -hermitian_solve
332: output_file: output/ex192_mkl_pardiso_3.out
333: test:
334: requires: cuda
335: suffix: mkl_pardiso_cuda_3
336: args: -symmetric_solve -hermitian_solve -cuda_solve
337: output_file: output/ex192_mkl_pardiso_3.out
339: testset:
340: requires: mumps double !complex
341: args: -solver 0
343: test:
344: suffix: mumps
345: test:
346: requires: cuda
347: suffix: mumps_cuda
348: args: -cuda_solve
349: output_file: output/ex192_mumps.out
350: test:
351: suffix: mumps_2
352: args: -symmetric_solve
353: output_file: output/ex192_mumps_2.out
354: test:
355: requires: cuda
356: suffix: mumps_cuda_2
357: args: -symmetric_solve -cuda_solve
358: output_file: output/ex192_mumps_2.out
359: test:
360: suffix: mumps_3
361: args: -symmetric_solve -hermitian_solve
362: output_file: output/ex192_mumps_3.out
363: test:
364: requires: cuda
365: suffix: mumps_cuda_3
366: args: -symmetric_solve -hermitian_solve -cuda_solve
367: output_file: output/ex192_mumps_3.out
369: TEST*/