1c4762a1bSJed Brown static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\ 25f9962eeSHong Zhang Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown #include <petscmat.h> 5c4762a1bSJed Brown 6d71ae5a4SJacob Faibussowitsch int main(int argc, char **args) 7d71ae5a4SJacob Faibussowitsch { 8c4762a1bSJed Brown Mat C; 9c4762a1bSJed Brown Vec s, u, w, x, y, z; 10c4762a1bSJed Brown PetscInt i, j, m = 8, n, rstart, rend, vstart, vend; 11c4762a1bSJed Brown PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1; 12c4762a1bSJed Brown PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; 13c4762a1bSJed Brown PetscBool flg; 14c4762a1bSJed Brown 15327415f7SBarry Smith PetscFunctionBeginUser; 16*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &args, NULL, help)); 179566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON)); 189566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 19c4762a1bSJed Brown n = m; 209566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg)); 21c4762a1bSJed Brown if (flg) n += 2; 229566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg)); 23c4762a1bSJed Brown if (flg) n -= 2; 24c4762a1bSJed Brown 25c4762a1bSJed Brown /* ---------- Assemble matrix and vectors ----------- */ 26c4762a1bSJed Brown 279566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &C)); 289566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n)); 299566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(C)); 309566063dSJacob Faibussowitsch PetscCall(MatSetUp(C)); 319566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(C, &rstart, &rend)); 329566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 339566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, m)); 349566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(x)); 359566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &z)); 369566063dSJacob Faibussowitsch PetscCall(VecDuplicate(x, &w)); 379566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &y)); 389566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, n)); 399566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(y)); 409566063dSJacob Faibussowitsch PetscCall(VecDuplicate(y, &u)); 419566063dSJacob Faibussowitsch PetscCall(VecDuplicate(y, &s)); 429566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &vstart, &vend)); 43c4762a1bSJed Brown 44c4762a1bSJed Brown /* Assembly */ 45c4762a1bSJed Brown for (i = rstart; i < rend; i++) { 46c4762a1bSJed Brown v = 100 * (i + 1); 479566063dSJacob Faibussowitsch PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES)); 48c4762a1bSJed Brown for (j = 0; j < n; j++) { 49c4762a1bSJed Brown v = 10 * (i + 1) + j + 1; 509566063dSJacob Faibussowitsch PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES)); 51c4762a1bSJed Brown } 52c4762a1bSJed Brown } 53c4762a1bSJed Brown 54c4762a1bSJed Brown /* Flush off proc Vec values and do more assembly */ 559566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(z)); 56c4762a1bSJed Brown for (i = vstart; i < vend; i++) { 57c4762a1bSJed Brown v = one * ((PetscReal)i); 589566063dSJacob Faibussowitsch PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES)); 59c4762a1bSJed Brown v = 100.0 * i; 609566063dSJacob Faibussowitsch PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES)); 61c4762a1bSJed Brown } 62c4762a1bSJed Brown 63c4762a1bSJed Brown /* Flush off proc Mat values and do more assembly */ 649566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY)); 65c4762a1bSJed Brown for (i = rstart; i < rend; i++) { 66c4762a1bSJed Brown for (j = 0; j < n; j++) { 67c4762a1bSJed Brown v = 10 * (i + 1) + j + 1; 689566063dSJacob Faibussowitsch PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES)); 69c4762a1bSJed Brown } 70c4762a1bSJed Brown } 71c4762a1bSJed Brown /* Try overlap Coomunication with the next stage XXXSetValues */ 729566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(z)); 73c4762a1bSJed Brown 749566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY)); 75c4762a1bSJed Brown CHKMEMQ; 76c4762a1bSJed Brown /* The Assembly for the second Stage */ 779566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 789566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 799566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(y)); 809566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(y)); 819566063dSJacob Faibussowitsch PetscCall(MatScale(C, alpha)); 829566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(u)); 839566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(u)); 849566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n")); 85c4762a1bSJed Brown CHKMEMQ; 869566063dSJacob Faibussowitsch PetscCall(MatMult(C, y, x)); 87c4762a1bSJed Brown CHKMEMQ; 889566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 899566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n")); 909566063dSJacob Faibussowitsch PetscCall(MatMultAdd(C, y, z, w)); 919566063dSJacob Faibussowitsch PetscCall(VecAXPY(x, one, z)); 929566063dSJacob Faibussowitsch PetscCall(VecAXPY(x, negone, w)); 939566063dSJacob Faibussowitsch PetscCall(VecNorm(x, NORM_2, &norm)); 9448a46eb9SPierre Jolivet if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm)); 95c4762a1bSJed Brown 96c4762a1bSJed Brown /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */ 97c4762a1bSJed Brown 98c4762a1bSJed Brown for (i = rstart; i < rend; i++) { 99c4762a1bSJed Brown v = one * ((PetscReal)i); 1009566063dSJacob Faibussowitsch PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES)); 101c4762a1bSJed Brown } 1029566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 1039566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 1049566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n")); 1059566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(C, x, y)); 1069566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 107c4762a1bSJed Brown 1089566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n")); 1099566063dSJacob Faibussowitsch PetscCall(MatMultTransposeAdd(C, x, u, s)); 1109566063dSJacob Faibussowitsch PetscCall(VecAXPY(y, one, u)); 1119566063dSJacob Faibussowitsch PetscCall(VecAXPY(y, negone, s)); 1129566063dSJacob Faibussowitsch PetscCall(VecNorm(y, NORM_2, &norm)); 11348a46eb9SPierre Jolivet if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm)); 114c4762a1bSJed Brown 115c4762a1bSJed Brown /* -------------------- Test MatGetDiagonal() ------------------ */ 116c4762a1bSJed Brown 1179566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n")); 1189566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 1199566063dSJacob Faibussowitsch PetscCall(VecSet(x, one)); 1209566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(C, x)); 1219566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 122c4762a1bSJed Brown for (i = vstart; i < vend; i++) { 123c4762a1bSJed Brown v = one * ((PetscReal)(i + 1)); 1249566063dSJacob Faibussowitsch PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES)); 125c4762a1bSJed Brown } 126c4762a1bSJed Brown 127c4762a1bSJed Brown /* -------------------- Test () MatDiagonalScale ------------------ */ 1289566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg)); 129c4762a1bSJed Brown if (flg) { 1309566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(C, x, y)); 1319566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 132c4762a1bSJed Brown } 1335f9962eeSHong Zhang /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */ 1345f9962eeSHong Zhang PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg)); 1355f9962eeSHong Zhang if (flg) { 1365f9962eeSHong Zhang Mat D; 1375f9962eeSHong Zhang PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D)); 1385f9962eeSHong Zhang PetscCall(MatZeroEntries(D)); 1395f9962eeSHong Zhang PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD)); 1405f9962eeSHong Zhang PetscCall(MatDestroy(&D)); 1415f9962eeSHong Zhang } 142c4762a1bSJed Brown /* Free data structures */ 1439371c9d4SSatish Balay PetscCall(VecDestroy(&u)); 1449371c9d4SSatish Balay PetscCall(VecDestroy(&s)); 1459371c9d4SSatish Balay PetscCall(VecDestroy(&w)); 1469371c9d4SSatish Balay PetscCall(VecDestroy(&x)); 1479371c9d4SSatish Balay PetscCall(VecDestroy(&y)); 1489371c9d4SSatish Balay PetscCall(VecDestroy(&z)); 1499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C)); 150c4762a1bSJed Brown 1519566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 152b122ec5aSJacob Faibussowitsch return 0; 153c4762a1bSJed Brown } 154c4762a1bSJed Brown 155c4762a1bSJed Brown /*TEST 156c4762a1bSJed Brown 157c4762a1bSJed Brown test: 158c4762a1bSJed Brown suffix: 11_A 159c4762a1bSJed Brown args: -mat_type seqaij -rectA 160c4762a1bSJed Brown filter: grep -v type 161c4762a1bSJed Brown 162c4762a1bSJed Brown test: 163c4762a1bSJed Brown args: -mat_type seqdense -rectA 164c4762a1bSJed Brown suffix: 12_A 165c4762a1bSJed Brown 166c4762a1bSJed Brown test: 167c4762a1bSJed Brown args: -mat_type seqaij -rectB 168c4762a1bSJed Brown suffix: 11_B 169c4762a1bSJed Brown filter: grep -v type 170c4762a1bSJed Brown 171c4762a1bSJed Brown test: 172c4762a1bSJed Brown args: -mat_type seqdense -rectB 173c4762a1bSJed Brown suffix: 12_B 174c4762a1bSJed Brown 175c4762a1bSJed Brown test: 176c4762a1bSJed Brown suffix: 21 177c4762a1bSJed Brown args: -mat_type mpiaij 178c4762a1bSJed Brown filter: grep -v type 179c4762a1bSJed Brown 180c4762a1bSJed Brown test: 181c4762a1bSJed Brown suffix: 22 182c4762a1bSJed Brown args: -mat_type mpidense 183c4762a1bSJed Brown 184c4762a1bSJed Brown test: 185c4762a1bSJed Brown suffix: 23 186c4762a1bSJed Brown nsize: 3 187c4762a1bSJed Brown args: -mat_type mpiaij 188c4762a1bSJed Brown filter: grep -v type 189c4762a1bSJed Brown 190c4762a1bSJed Brown test: 191c4762a1bSJed Brown suffix: 24 192c4762a1bSJed Brown nsize: 3 193c4762a1bSJed Brown args: -mat_type mpidense 194c4762a1bSJed Brown 195c4762a1bSJed Brown test: 196c4762a1bSJed Brown suffix: 2_aijcusparse_1 197c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 198c4762a1bSJed Brown filter: grep -v type 199c4762a1bSJed Brown output_file: output/ex5_21.out 200c4762a1bSJed Brown requires: cuda 201c4762a1bSJed Brown 202c4762a1bSJed Brown test: 203c4762a1bSJed Brown nsize: 3 204c4762a1bSJed Brown suffix: 2_aijcusparse_2 205c4762a1bSJed Brown filter: grep -v type 206c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 207bd46da1dSJunchao Zhang args: -sf_type {{basic neighbor}} 208c4762a1bSJed Brown output_file: output/ex5_23.out 209c4762a1bSJed Brown requires: cuda 210c4762a1bSJed Brown 211c4762a1bSJed Brown test: 212c4762a1bSJed Brown nsize: 3 213c4762a1bSJed Brown suffix: 2_aijcusparse_3 214c4762a1bSJed Brown filter: grep -v type 215c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 216c20d7725SJed Brown args: -sf_type {{basic neighbor}} 217c4762a1bSJed Brown output_file: output/ex5_23.out 218dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE) 219c4762a1bSJed Brown 220c4762a1bSJed Brown test: 221c4762a1bSJed Brown suffix: 31 222c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale 223c4762a1bSJed Brown filter: grep -v type 224c4762a1bSJed Brown 225c4762a1bSJed Brown test: 226c4762a1bSJed Brown suffix: 32 227c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale 228c4762a1bSJed Brown 229c4762a1bSJed Brown test: 230c4762a1bSJed Brown suffix: 33 231c4762a1bSJed Brown nsize: 3 232c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale 233c4762a1bSJed Brown filter: grep -v type 234c4762a1bSJed Brown 235c4762a1bSJed Brown test: 236c4762a1bSJed Brown suffix: 34 237c4762a1bSJed Brown nsize: 3 238c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale 239c4762a1bSJed Brown 240c4762a1bSJed Brown test: 241c4762a1bSJed Brown suffix: 3_aijcusparse_1 242c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale 243c4762a1bSJed Brown filter: grep -v type 244c4762a1bSJed Brown output_file: output/ex5_31.out 245c4762a1bSJed Brown requires: cuda 246c4762a1bSJed Brown 247c4762a1bSJed Brown test: 248c4762a1bSJed Brown suffix: 3_aijcusparse_2 249c4762a1bSJed Brown nsize: 3 250c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale 251c4762a1bSJed Brown filter: grep -v type 252c4762a1bSJed Brown output_file: output/ex5_33.out 253c4762a1bSJed Brown requires: cuda 254c4762a1bSJed Brown 255c4762a1bSJed Brown test: 25635990778SJunchao Zhang suffix: 3_kokkos 25735990778SJunchao Zhang nsize: 3 25835990778SJunchao Zhang args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale 25935990778SJunchao Zhang filter: grep -v type 26035990778SJunchao Zhang output_file: output/ex5_33.out 261dcfd994dSJunchao Zhang requires: kokkos_kernels 26235990778SJunchao Zhang 26335990778SJunchao Zhang test: 264c4762a1bSJed Brown suffix: aijcusparse_1 265c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectA 266c4762a1bSJed Brown filter: grep -v type 267c4762a1bSJed Brown output_file: output/ex5_11_A.out 268c4762a1bSJed Brown requires: cuda 269c4762a1bSJed Brown 270c4762a1bSJed Brown test: 271c4762a1bSJed Brown suffix: aijcusparse_2 272c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectB 273c4762a1bSJed Brown filter: grep -v type 274c4762a1bSJed Brown output_file: output/ex5_11_B.out 275c4762a1bSJed Brown requires: cuda 276c4762a1bSJed Brown 277c4762a1bSJed Brown test: 278c4762a1bSJed Brown suffix: sell_1 2795f9962eeSHong Zhang args: -mat_type sell -mat_sell_slice_height 8 280c4762a1bSJed Brown output_file: output/ex5_41.out 281c4762a1bSJed Brown 282c4762a1bSJed Brown test: 283c4762a1bSJed Brown suffix: sell_2 284c4762a1bSJed Brown nsize: 3 2855f9962eeSHong Zhang args: -mat_type sell -mat_sell_slice_height 8 286c4762a1bSJed Brown output_file: output/ex5_43.out 287c4762a1bSJed Brown 288c4762a1bSJed Brown test: 289c4762a1bSJed Brown suffix: sell_3 2905f9962eeSHong Zhang args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8 291c4762a1bSJed Brown output_file: output/ex5_51.out 292c4762a1bSJed Brown 293c4762a1bSJed Brown test: 294c4762a1bSJed Brown suffix: sell_4 295c4762a1bSJed Brown nsize: 3 2965f9962eeSHong Zhang args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8 297c4762a1bSJed Brown output_file: output/ex5_53.out 298c4762a1bSJed Brown 2992d1451d4SHong Zhang test: 3002d1451d4SHong Zhang suffix: sell_5 30190d2215bSHong Zhang nsize: 3 3025f9962eeSHong Zhang args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries 30390d2215bSHong Zhang output_file: output/ex5_55.out 3048711c661SHong Zhang requires: cuda !complex 3052d1451d4SHong Zhang 3065f9962eeSHong Zhang test: 3075f9962eeSHong Zhang suffix: sell_6 3085f9962eeSHong Zhang nsize: 3 3095f9962eeSHong Zhang args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}} 3105f9962eeSHong Zhang output_file: output/ex5_56.out 3115f9962eeSHong Zhang requires: cuda !complex 3125f9962eeSHong Zhang 3135f9962eeSHong Zhang test: 3145f9962eeSHong Zhang suffix: sell_7 3155f9962eeSHong Zhang args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}} 3165f9962eeSHong Zhang output_file: output/ex5_57.out 3175f9962eeSHong Zhang requires: cuda !complex !single 318773bf0f6SHong Zhang 319773bf0f6SHong Zhang test: 320773bf0f6SHong Zhang suffix: sell_8 321773bf0f6SHong Zhang nsize: 3 322773bf0f6SHong Zhang args: -mat_type sellhip -vec_type hip -test_diagonalscale -test_zeroentries 323773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g" 324773bf0f6SHong Zhang output_file: output/ex5_55.out 325773bf0f6SHong Zhang requires: hip !complex 326773bf0f6SHong Zhang 327773bf0f6SHong Zhang test: 328773bf0f6SHong Zhang suffix: sell_9 329773bf0f6SHong Zhang nsize: 3 330773bf0f6SHong Zhang args: -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{1 2 3 4 5 6}} 331773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g" 332773bf0f6SHong Zhang output_file: output/ex5_56.out 333773bf0f6SHong Zhang requires: hip !complex 334773bf0f6SHong Zhang 335773bf0f6SHong Zhang test: 336773bf0f6SHong Zhang suffix: sell_10 337773bf0f6SHong Zhang args: -m 32 -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{0 7 9}} -mat_sell_spmv_hip_blocky {{2 4 8 16 32}} 338773bf0f6SHong Zhang filter: sed -e "s/hip/cuda/g" 339773bf0f6SHong Zhang output_file: output/ex5_57.out 340773bf0f6SHong Zhang requires: hip !complex !single 341c4762a1bSJed Brown TEST*/ 342