Actual source code: ex237.c

petsc-3.14.6 2021-03-30
Report Typos and Errors
  1: static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";

  3: /*
  4:   See the paper below for more information

  6:    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
  7:    P. Jolivet, J. E. Roman, and S. Zampini (2020).
  8: */

 10: #include <petsc.h>

 12: #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 13: #include <mkl.h>
 14: #define PetscStackCallMKLSparse(func, args) do {               \
 15:     sparse_status_t __ierr;                                    \
 16:     PetscStackPush(#func);                                     \
 17:     __func args;                                        \
 18:     PetscStackPop;                                             \
 19:     if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
 20:   } while (0)
 21: #else
 22: #define PetscStackCallMKLSparse(func, args) do {               \
 23:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
 24:   } while (0)
 25: #endif

 27: int main(int argc, char** argv)
 28: {
 29:   Mat            A, C, D, E;
 30:   PetscInt       nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
 31:   PetscViewer    viewer;
 32:   PetscInt       bs[10], N[8];
 33:   char           *type[10];
 34:   PetscMPIInt    size;
 35:   PetscBool      flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl;
 36:   char           file[PETSC_MAX_PATH_LEN];

 39:   PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr;
 40:   if (ierr) return ierr;
 41:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 42:   if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
 43:   PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);
 44:   if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
 45:   PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);
 46:   PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);
 47:   if (!flg) {
 48:     nbs = 1;
 49:     bs[0] = 1;
 50:   }
 51:   PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);
 52:   if (!flg) {
 53:     nN = 8;
 54:     N[0] = 1;  N[1] = 2;  N[2] = 4;  N[3] = 8;
 55:     N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128;
 56:   }
 57:   PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);
 58:   if (!flg) {
 59:     ntype = 1;
 60:     PetscStrallocpy(MATSEQAIJ, &type[0]);
 61:   }
 62:   PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);
 63:   PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);
 64:   PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);
 65:   for (PetscInt j = 0; j < nbs; ++j) {
 66:     PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);
 67:     MatCreate(PETSC_COMM_WORLD, &A);
 68:     MatSetFromOptions(A);
 69:     MatLoad(A, viewer);
 70:     PetscViewerDestroy(&viewer);
 71:     PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");
 72:     if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
 73:     MatGetSize(A, &m, &M);
 74:     if (m == M) {
 75:       Mat oA;
 76:       MatTranspose(A, MAT_INITIAL_MATRIX, &oA);
 77:       MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);
 78:       MatDestroy(&oA);
 79:     }
 80:     MatGetLocalSize(A, &m, NULL);
 81:     MatGetSize(A, &M, NULL);
 82:     if (bs[j] > 1) {
 83:       Mat               T, Tt, B;
 84:       const PetscScalar *ptr;
 85:       PetscScalar       *val, *Aa;
 86:       const PetscInt    *Ai, *Aj;
 87:       PetscInt          An, i, k;
 88:       PetscBool         done;

 90:       MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);
 91:       MatSetRandom(T, NULL);
 92:       MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);
 93:       MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);
 94:       MatDestroy(&Tt);
 95:       MatDenseGetArrayRead(T, &ptr);
 96:       MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);
 97:       if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
 98:       MatSeqAIJGetArray(A, &Aa);
 99:       MatCreate(PETSC_COMM_WORLD, &B);
100:       MatSetType(B, MATSEQBAIJ);
101:       MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);
102:       PetscMalloc1(Ai[An] * bs[j] * bs[j],&val);
103:       for (i = 0; i < Ai[An]; ++i)
104:         for (k = 0; k < bs[j] * bs[j]; ++k)
105:           val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
106:       MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);
107:       MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);
108:       PetscFree(val);
109:       MatSeqAIJRestoreArray(A, &Aa);
110:       MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);
111:       MatDenseRestoreArrayRead(T, &ptr);
112:       MatDestroy(&T);
113:       MatDestroy(&A);
114:       A    = B;
115:     }
116:     /* reconvert back to SeqAIJ before converting to the desired type later */
117:     if (!convert) E = A;
118:     MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);
119:     MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);
120:     for (PetscInt i = 0; i < ntype; ++i) {
121:       char        *tmp;
122:       PetscInt    *ia_ptr, *ja_ptr, k;
123:       PetscScalar *a_ptr;
124: #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
125:       struct matrix_descr descr;
126:       sparse_matrix_t     spr;
127:       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
128:       descr.diag = SPARSE_DIAG_NON_UNIT;
129: #endif
130:       if (convert) {
131:         MatDestroy(&A);
132:       }
133:       PetscStrstr(type[i], "mkl", &tmp);
134:       if (tmp) {
135:         size_t mlen, tlen;
136:         char base[256];

138:         mkl  = PETSC_TRUE;
139:         PetscStrlen(tmp, &mlen);
140:         PetscStrlen(type[i], &tlen);
141:         PetscStrncpy(base, type[i], tlen-mlen + 1);
142:         MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
143:       } else {
144:         mkl  = PETSC_FALSE;
145:         PetscStrstr(type[i], "maij", &tmp);
146:         if (!tmp) {
147:           MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
148:         } else {
149:           MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);
150:           maij = PETSC_TRUE;
151:         }
152:       }
153:       PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");
154:       if (mkl) {
155:         const PetscInt *Ai, *Aj;
156:         PetscInt       An;
157:         PetscBool      done;

159:         PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");
160:         if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
161:         PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
162:         MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);
163:         if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
164:         PetscMalloc1(An + 1,&ia_ptr);
165:         PetscMalloc1(Ai[An],&ja_ptr);
166:         if (flg) { /* SeqAIJ */
167:           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
168:           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
169:           MatSeqAIJGetArray(A, &a_ptr);
170:           PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
171:         } else {
172:           PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);
173:           if (flg) {
174:             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
175:             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
176:             MatSeqBAIJGetArray(A, &a_ptr);
177:             PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
178:           } else {
179:             PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);
180:             if (flg) {
181:               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
182:               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
183:               MatSeqSBAIJGetArray(A, &a_ptr);
184:               PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
185: #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
186:               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
187:               descr.mode = SPARSE_FILL_MODE_UPPER;
188:               descr.diag = SPARSE_DIAG_NON_UNIT;
189: #endif
190:             }
191:           }
192:         }
193:         PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
194:         MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);
195:       }

197:       MatViewFromOptions(A, NULL, "-A_view");

199:       for (k = 0; k < nN; ++k) {
200:         MatType       Atype, Ctype;
201:         PetscInt      AM, AN, CM, CN, t;
202: #if defined(PETSC_USE_LOG)
203:         PetscLogStage stage, tstage;
204:         char          stage_s[256];
205: #endif

207:         MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);
208:         MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);
209:         MatSetRandom(C, NULL);
210:         if (cuda) { /* convert to GPU if needed */
211:           MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);
212:           MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);
213:         }
214:         if (mkl) {
215:           if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
216:           else          PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
217:           PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
218:           PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
219:         }
220:         MatGetType(A, &Atype);
221:         MatGetType(C, &Ctype);
222:         MatGetSize(A, &AM, &AN);
223:         MatGetSize(C, &CM, &CN);

225: #if defined(PETSC_USE_LOG)
226:         if (!maij || N[k] > 1) {
227:           PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);
228:           PetscLogStageRegister(stage_s, &stage);
229:         }
230:         if (trans && N[k] > 1) {
231:           PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);
232:           PetscLogStageRegister(stage_s, &tstage);
233:         }
234: #endif
235:         /* A*B */
236:         if (N[k] > 1) {
237:           if (!maij) {
238:             MatProductCreateWithMat(A, C, NULL, D);
239:             MatProductSetType(D, MATPRODUCT_AB);
240:             MatProductSetFromOptions(D);
241:             MatProductSymbolic(D);
242:           }

244:           if (!mkl) {
245:             if (!maij) {
246:               MatProductNumeric(D);
247:               PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);
248:               PetscLogStagePush(stage);
249:               for (t = 0; t < trial; ++t) {
250:                 MatProductNumeric(D);
251:               }
252:               PetscLogStagePop();
253:             } else {
254:               Mat               E, Ct, Dt;
255:               Vec               cC, cD;
256:               const PetscScalar *c_ptr;
257:               PetscScalar       *d_ptr;
258:               MatCreateMAIJ(A, N[k], &E);
259:               MatDenseGetLocalMatrix(C, &Ct);
260:               MatDenseGetLocalMatrix(D, &Dt);
261:               MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);
262:               MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);
263:               MatDenseGetArrayRead(Ct, &c_ptr);
264:               MatDenseGetArrayWrite(Dt, &d_ptr);
265:               VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);
266:               VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);
267:               MatMult(E, cC, cD);
268:               PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);
269:               PetscLogStagePush(stage);
270:               for (t = 0; t < trial; ++t) {
271:                 MatMult(E, cC, cD);
272:               }
273:               PetscLogStagePop();
274:               VecDestroy(&cD);
275:               VecDestroy(&cC);
276:               MatDestroy(&E);
277:               MatDenseRestoreArrayWrite(Dt, &d_ptr);
278:               MatDenseRestoreArrayRead(Ct, &c_ptr);
279:               MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);
280:               MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);
281:             }
282:           } else {
283:             const PetscScalar *c_ptr;
284:             PetscScalar       *d_ptr;

286:             MatDenseGetArrayRead(C, &c_ptr);
287:             MatDenseGetArrayWrite(D, &d_ptr);
288:             PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
289:             PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);
290:             PetscLogStagePush(stage);
291:             for (t = 0; t < trial; ++t) {
292:               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
293:             }
294:             PetscLogStagePop();
295:             MatDenseRestoreArrayWrite(D, &d_ptr);
296:             MatDenseRestoreArrayRead(C, &c_ptr);
297:           }
298:         } else if (maij) {
299:           MatDestroy(&C);
300:           MatDestroy(&D);
301:           continue;
302:         } else if (!mkl) {
303:           Vec cC, cD;

305:           MatDenseGetColumnVecRead(C, 0, &cC);
306:           MatDenseGetColumnVecWrite(D, 0, &cD);
307:           MatMult(A, cC, cD);
308:           PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN);
309:           PetscLogStagePush(stage);
310:           for (t = 0; t < trial; ++t) {
311:             MatMult(A, cC, cD);
312:           }
313:           PetscLogStagePop();
314:           MatDenseRestoreColumnVecRead(C, 0, &cC);
315:           MatDenseRestoreColumnVecWrite(D, 0, &cD);
316:         } else {
317:           const PetscScalar *c_ptr;
318:           PetscScalar       *d_ptr;

320:           MatDenseGetArrayRead(C, &c_ptr);
321:           MatDenseGetArrayWrite(D, &d_ptr);
322:           PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN);
323:           PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324:           PetscLogStagePush(stage);
325:           for (t = 0; t < trial; ++t) {
326:             PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
327:           }
328:           PetscLogStagePop();
329:           MatDenseRestoreArrayWrite(D, &d_ptr);
330:           MatDenseRestoreArrayRead(C, &c_ptr);
331:         }

333:         if (check) {
334:           MatMatMultEqual(A, C, D, 10, &flg);
335:           if (!flg) {
336:             MatType Dtype;

338:             MatGetType(D, &Dtype);
339:             PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);
340:           }
341:         }

343:         /* MKL implementation seems buggy for ABt */
344:         /* A*Bt */
345:         if (!mkl && trans && N[k] > 1) {
346:           PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");
347:           if (flg) {
348:             MatTranspose(C, MAT_INPLACE_MATRIX, &C);
349:             MatGetType(C, &Ctype);
350:             if (!mkl) {
351:               MatProductCreateWithMat(A, C, NULL, D);
352:               MatProductSetType(D, MATPRODUCT_ABt);
353:               MatProductSetFromOptions(D);
354:               MatProductSymbolic(D);
355:               MatProductNumeric(D);
356:               PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);
357:               PetscLogStagePush(tstage);
358:               for (t = 0; t < trial; ++t) {
359:                 MatProductNumeric(D);
360:               }
361:               PetscLogStagePop();
362:             } else {
363:               const PetscScalar *c_ptr;
364:               PetscScalar       *d_ptr;

366:               PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
367:               PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
368:               MatDenseGetArrayRead(C, &c_ptr);
369:               MatDenseGetArrayWrite(D, &d_ptr);
370:               PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);
371:               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
372:               PetscLogStagePush(stage);
373:               for (t = 0; t < trial; ++t) {
374:                 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
375:               }
376:               PetscLogStagePop();
377:               MatDenseRestoreArrayWrite(D, &d_ptr);
378:               MatDenseRestoreArrayRead(C, &c_ptr);
379:             }
380:           }
381:         }

383:         if (!mkl && trans && N[k] > 1 && flg && check) {
384:           MatMatTransposeMultEqual(A, C, D, 10, &flg);
385:           if (!flg) {
386:             MatType Dtype;
387:             MatGetType(D, &Dtype);
388:             PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);
389:           }
390:         }
391:         MatDestroy(&C);
392:         MatDestroy(&D);
393:       }
394:       if (mkl) {
395:         PetscStackCallMKLSparse(mkl_sparse_destroy, (spr));
396:         PetscFree(ia_ptr);
397:         PetscFree(ja_ptr);
398:       }
399:       if (cuda && i != ntype - 1) {
400:         PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");
401:         break;
402:       }
403:     }
404:     if (E != A) {
405:       MatDestroy(&E);
406:     }
407:     MatDestroy(&A);
408:   }
409:   for (m = 0; m < ntype; ++m) {
410:     PetscFree(type[m]);
411:   }
412:   PetscFinalize();
413:   return 0;
414: }

416: /*TEST
417:    build:
418:      requires: double !complex !define(PETSC_USE_64BIT_INDICES)

420:    testset:
421:      nsize: 1
422:      filter: sed "/Benchmarking/d"
423:      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output}
424:      test:
425:        suffix: basic
426:        args: -type aij,sbaij,baij
427:        output_file: output/ex237.out
428:      test:
429:        suffix: maij
430:        args: -type aij,maij
431:        output_file: output/ex237.out
432:      test:
433:        suffix: cuda
434:        requires: cuda
435:        args: -type aij,aijcusparse
436:        output_file: output/ex237.out
437:      test:
438:        suffix: mkl
439:        requires: mkl
440:        args: -type aij,aijmkl,baijmkl,sbaijmkl
441:        output_file: output/ex237.out

443: TEST*/