1*c4762a1bSJed Brown 2*c4762a1bSJed Brown static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\ 3*c4762a1bSJed Brown Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), and MatDiagonalScale().\n\n"; 4*c4762a1bSJed Brown 5*c4762a1bSJed Brown #include <petscmat.h> 6*c4762a1bSJed Brown 7*c4762a1bSJed Brown int main(int argc,char **args) 8*c4762a1bSJed Brown { 9*c4762a1bSJed Brown Mat C; 10*c4762a1bSJed Brown Vec s,u,w,x,y,z; 11*c4762a1bSJed Brown PetscErrorCode ierr; 12*c4762a1bSJed Brown PetscInt i,j,m = 8,n,rstart,rend,vstart,vend; 13*c4762a1bSJed Brown PetscScalar one = 1.0,negone = -1.0,v,alpha=0.1; 14*c4762a1bSJed Brown PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; 15*c4762a1bSJed Brown PetscBool flg; 16*c4762a1bSJed Brown 17*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 18*c4762a1bSJed Brown ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_COMMON);CHKERRQ(ierr); 19*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 20*c4762a1bSJed Brown n = m; 21*c4762a1bSJed Brown ierr = PetscOptionsHasName(NULL,NULL,"-rectA",&flg);CHKERRQ(ierr); 22*c4762a1bSJed Brown if (flg) n += 2; 23*c4762a1bSJed Brown ierr = PetscOptionsHasName(NULL,NULL,"-rectB",&flg);CHKERRQ(ierr); 24*c4762a1bSJed Brown if (flg) n -= 2; 25*c4762a1bSJed Brown 26*c4762a1bSJed Brown /* ---------- Assemble matrix and vectors ----------- */ 27*c4762a1bSJed Brown 28*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); 29*c4762a1bSJed Brown ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); 30*c4762a1bSJed Brown ierr = MatSetFromOptions(C);CHKERRQ(ierr); 31*c4762a1bSJed Brown ierr = MatSetUp(C);CHKERRQ(ierr); 32*c4762a1bSJed Brown ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr); 33*c4762a1bSJed Brown ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); 34*c4762a1bSJed Brown ierr = VecSetSizes(x,PETSC_DECIDE,m);CHKERRQ(ierr); 35*c4762a1bSJed Brown ierr = VecSetFromOptions(x);CHKERRQ(ierr); 36*c4762a1bSJed Brown ierr = VecDuplicate(x,&z);CHKERRQ(ierr); 37*c4762a1bSJed Brown ierr = VecDuplicate(x,&w);CHKERRQ(ierr); 38*c4762a1bSJed Brown ierr = VecCreate(PETSC_COMM_WORLD,&y);CHKERRQ(ierr); 39*c4762a1bSJed Brown ierr = VecSetSizes(y,PETSC_DECIDE,n);CHKERRQ(ierr); 40*c4762a1bSJed Brown ierr = VecSetFromOptions(y);CHKERRQ(ierr); 41*c4762a1bSJed Brown ierr = VecDuplicate(y,&u);CHKERRQ(ierr); 42*c4762a1bSJed Brown ierr = VecDuplicate(y,&s);CHKERRQ(ierr); 43*c4762a1bSJed Brown ierr = VecGetOwnershipRange(y,&vstart,&vend);CHKERRQ(ierr); 44*c4762a1bSJed Brown 45*c4762a1bSJed Brown /* Assembly */ 46*c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 47*c4762a1bSJed Brown v = 100*(i+1); 48*c4762a1bSJed Brown ierr = VecSetValues(z,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); 49*c4762a1bSJed Brown for (j=0; j<n; j++) { 50*c4762a1bSJed Brown v = 10*(i+1)+j+1; 51*c4762a1bSJed Brown ierr = MatSetValues(C,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); 52*c4762a1bSJed Brown } 53*c4762a1bSJed Brown } 54*c4762a1bSJed Brown 55*c4762a1bSJed Brown /* Flush off proc Vec values and do more assembly */ 56*c4762a1bSJed Brown ierr = VecAssemblyBegin(z);CHKERRQ(ierr); 57*c4762a1bSJed Brown for (i=vstart; i<vend; i++) { 58*c4762a1bSJed Brown v = one*((PetscReal)i); 59*c4762a1bSJed Brown ierr = VecSetValues(y,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); 60*c4762a1bSJed Brown v = 100.0*i; 61*c4762a1bSJed Brown ierr = VecSetValues(u,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); 62*c4762a1bSJed Brown } 63*c4762a1bSJed Brown 64*c4762a1bSJed Brown /* Flush off proc Mat values and do more assembly */ 65*c4762a1bSJed Brown ierr = MatAssemblyBegin(C,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 66*c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 67*c4762a1bSJed Brown for (j=0; j<n; j++) { 68*c4762a1bSJed Brown v = 10*(i+1)+j+1; 69*c4762a1bSJed Brown ierr = MatSetValues(C,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); 70*c4762a1bSJed Brown } 71*c4762a1bSJed Brown } 72*c4762a1bSJed Brown /* Try overlap Coomunication with the next stage XXXSetValues */ 73*c4762a1bSJed Brown ierr = VecAssemblyEnd(z);CHKERRQ(ierr); 74*c4762a1bSJed Brown 75*c4762a1bSJed Brown ierr = MatAssemblyEnd(C,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 76*c4762a1bSJed Brown CHKMEMQ; 77*c4762a1bSJed Brown /* The Assembly for the second Stage */ 78*c4762a1bSJed Brown ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 79*c4762a1bSJed Brown ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 80*c4762a1bSJed Brown ierr = VecAssemblyBegin(y);CHKERRQ(ierr); 81*c4762a1bSJed Brown ierr = VecAssemblyEnd(y);CHKERRQ(ierr); 82*c4762a1bSJed Brown ierr = MatScale(C,alpha);CHKERRQ(ierr); 83*c4762a1bSJed Brown ierr = VecAssemblyBegin(u);CHKERRQ(ierr); 84*c4762a1bSJed Brown ierr = VecAssemblyEnd(u);CHKERRQ(ierr); 85*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMult()\n");CHKERRQ(ierr); 86*c4762a1bSJed Brown CHKMEMQ; 87*c4762a1bSJed Brown ierr = MatMult(C,y,x);CHKERRQ(ierr); 88*c4762a1bSJed Brown CHKMEMQ; 89*c4762a1bSJed Brown ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 90*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultAdd()\n");CHKERRQ(ierr); 91*c4762a1bSJed Brown ierr = MatMultAdd(C,y,z,w);CHKERRQ(ierr); 92*c4762a1bSJed Brown ierr = VecAXPY(x,one,z);CHKERRQ(ierr); 93*c4762a1bSJed Brown ierr = VecAXPY(x,negone,w);CHKERRQ(ierr); 94*c4762a1bSJed Brown ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 95*c4762a1bSJed Brown if (norm > tol) { 96*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error difference = %g\n",(double)norm);CHKERRQ(ierr); 97*c4762a1bSJed Brown } 98*c4762a1bSJed Brown 99*c4762a1bSJed Brown /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */ 100*c4762a1bSJed Brown 101*c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 102*c4762a1bSJed Brown v = one*((PetscReal)i); 103*c4762a1bSJed Brown ierr = VecSetValues(x,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); 104*c4762a1bSJed Brown } 105*c4762a1bSJed Brown ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 106*c4762a1bSJed Brown ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 107*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultTranspose()\n");CHKERRQ(ierr); 108*c4762a1bSJed Brown ierr = MatMultTranspose(C,x,y);CHKERRQ(ierr); 109*c4762a1bSJed Brown ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 110*c4762a1bSJed Brown 111*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatMultTransposeAdd()\n");CHKERRQ(ierr); 112*c4762a1bSJed Brown ierr = MatMultTransposeAdd(C,x,u,s);CHKERRQ(ierr); 113*c4762a1bSJed Brown ierr = VecAXPY(y,one,u);CHKERRQ(ierr); 114*c4762a1bSJed Brown ierr = VecAXPY(y,negone,s);CHKERRQ(ierr); 115*c4762a1bSJed Brown ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 116*c4762a1bSJed Brown if (norm > tol) { 117*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error difference = %g\n",(double)norm);CHKERRQ(ierr); 118*c4762a1bSJed Brown } 119*c4762a1bSJed Brown 120*c4762a1bSJed Brown /* -------------------- Test MatGetDiagonal() ------------------ */ 121*c4762a1bSJed Brown 122*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"testing MatGetDiagonal(), MatDiagonalScale()\n");CHKERRQ(ierr); 123*c4762a1bSJed Brown ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 124*c4762a1bSJed Brown ierr = VecSet(x,one);CHKERRQ(ierr); 125*c4762a1bSJed Brown ierr = MatGetDiagonal(C,x);CHKERRQ(ierr); 126*c4762a1bSJed Brown ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 127*c4762a1bSJed Brown for (i=vstart; i<vend; i++) { 128*c4762a1bSJed Brown v = one*((PetscReal)(i+1)); 129*c4762a1bSJed Brown ierr = VecSetValues(y,1,&i,&v,INSERT_VALUES);CHKERRQ(ierr); 130*c4762a1bSJed Brown } 131*c4762a1bSJed Brown 132*c4762a1bSJed Brown /* -------------------- Test () MatDiagonalScale ------------------ */ 133*c4762a1bSJed Brown ierr = PetscOptionsHasName(NULL,NULL,"-test_diagonalscale",&flg);CHKERRQ(ierr); 134*c4762a1bSJed Brown if (flg) { 135*c4762a1bSJed Brown ierr = MatDiagonalScale(C,x,y);CHKERRQ(ierr); 136*c4762a1bSJed Brown ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 137*c4762a1bSJed Brown } 138*c4762a1bSJed Brown /* Free data structures */ 139*c4762a1bSJed Brown ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&s);CHKERRQ(ierr); 140*c4762a1bSJed Brown ierr = VecDestroy(&w);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); 141*c4762a1bSJed Brown ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); 142*c4762a1bSJed Brown ierr = MatDestroy(&C);CHKERRQ(ierr); 143*c4762a1bSJed Brown 144*c4762a1bSJed Brown ierr = PetscFinalize(); 145*c4762a1bSJed Brown return ierr; 146*c4762a1bSJed Brown } 147*c4762a1bSJed Brown 148*c4762a1bSJed Brown 149*c4762a1bSJed Brown /*TEST 150*c4762a1bSJed Brown 151*c4762a1bSJed Brown 152*c4762a1bSJed Brown test: 153*c4762a1bSJed Brown suffix: 11_A 154*c4762a1bSJed Brown args: -mat_type seqaij -rectA 155*c4762a1bSJed Brown filter: grep -v type 156*c4762a1bSJed Brown 157*c4762a1bSJed Brown test: 158*c4762a1bSJed Brown args: -mat_type seqdense -rectA 159*c4762a1bSJed Brown suffix: 12_A 160*c4762a1bSJed Brown 161*c4762a1bSJed Brown test: 162*c4762a1bSJed Brown args: -mat_type seqaij -rectB 163*c4762a1bSJed Brown suffix: 11_B 164*c4762a1bSJed Brown filter: grep -v type 165*c4762a1bSJed Brown 166*c4762a1bSJed Brown test: 167*c4762a1bSJed Brown args: -mat_type seqdense -rectB 168*c4762a1bSJed Brown suffix: 12_B 169*c4762a1bSJed Brown 170*c4762a1bSJed Brown test: 171*c4762a1bSJed Brown suffix: 21 172*c4762a1bSJed Brown args: -mat_type mpiaij 173*c4762a1bSJed Brown filter: grep -v type 174*c4762a1bSJed Brown 175*c4762a1bSJed Brown test: 176*c4762a1bSJed Brown suffix: 22 177*c4762a1bSJed Brown args: -mat_type mpidense 178*c4762a1bSJed Brown 179*c4762a1bSJed Brown test: 180*c4762a1bSJed Brown suffix: 23 181*c4762a1bSJed Brown nsize: 3 182*c4762a1bSJed Brown args: -mat_type mpiaij 183*c4762a1bSJed Brown filter: grep -v type 184*c4762a1bSJed Brown 185*c4762a1bSJed Brown test: 186*c4762a1bSJed Brown suffix: 24 187*c4762a1bSJed Brown nsize: 3 188*c4762a1bSJed Brown args: -mat_type mpidense 189*c4762a1bSJed Brown 190*c4762a1bSJed Brown test: 191*c4762a1bSJed Brown suffix: 2_aijcusparse_1 192*c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 193*c4762a1bSJed Brown filter: grep -v type 194*c4762a1bSJed Brown output_file: output/ex5_21.out 195*c4762a1bSJed Brown requires: cuda 196*c4762a1bSJed Brown 197*c4762a1bSJed Brown 198*c4762a1bSJed Brown test: 199*c4762a1bSJed Brown nsize: 3 200*c4762a1bSJed Brown suffix: 2_aijcusparse_2 201*c4762a1bSJed Brown filter: grep -v type 202*c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 203*c4762a1bSJed Brown args: -sf_type {{basic neighbor}} -vecscatter_packongpu {{0 1}} -sf_use_pinned_buffer {{0 1}} 204*c4762a1bSJed Brown output_file: output/ex5_23.out 205*c4762a1bSJed Brown requires: cuda 206*c4762a1bSJed Brown 207*c4762a1bSJed Brown test: 208*c4762a1bSJed Brown nsize: 3 209*c4762a1bSJed Brown suffix: 2_aijcusparse_3 210*c4762a1bSJed Brown filter: grep -v type 211*c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda 212*c4762a1bSJed Brown args: -sf_type {{basic neighbor}} -use_gpu_aware_mpi 213*c4762a1bSJed Brown output_file: output/ex5_23.out 214*c4762a1bSJed Brown requires: cuda define(PETSC_HAVE_MPI_GPU_AWARE) 215*c4762a1bSJed Brown 216*c4762a1bSJed Brown test: 217*c4762a1bSJed Brown suffix: 31 218*c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale 219*c4762a1bSJed Brown filter: grep -v type 220*c4762a1bSJed Brown 221*c4762a1bSJed Brown test: 222*c4762a1bSJed Brown suffix: 32 223*c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale 224*c4762a1bSJed Brown filter: grep -v Mat_ 225*c4762a1bSJed Brown 226*c4762a1bSJed Brown test: 227*c4762a1bSJed Brown suffix: 33 228*c4762a1bSJed Brown nsize: 3 229*c4762a1bSJed Brown args: -mat_type mpiaij -test_diagonalscale 230*c4762a1bSJed Brown filter: grep -v type 231*c4762a1bSJed Brown 232*c4762a1bSJed Brown test: 233*c4762a1bSJed Brown suffix: 34 234*c4762a1bSJed Brown nsize: 3 235*c4762a1bSJed Brown args: -mat_type mpibaij -test_diagonalscale 236*c4762a1bSJed Brown filter: grep -v Mat_ 237*c4762a1bSJed Brown 238*c4762a1bSJed Brown test: 239*c4762a1bSJed Brown suffix: 3_aijcusparse_1 240*c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale 241*c4762a1bSJed Brown filter: grep -v type 242*c4762a1bSJed Brown output_file: output/ex5_31.out 243*c4762a1bSJed Brown requires: cuda 244*c4762a1bSJed Brown 245*c4762a1bSJed Brown test: 246*c4762a1bSJed Brown suffix: 3_aijcusparse_2 247*c4762a1bSJed Brown nsize: 3 248*c4762a1bSJed Brown args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale 249*c4762a1bSJed Brown filter: grep -v type 250*c4762a1bSJed Brown output_file: output/ex5_33.out 251*c4762a1bSJed Brown requires: cuda 252*c4762a1bSJed Brown 253*c4762a1bSJed Brown test: 254*c4762a1bSJed Brown suffix: aijcusparse_1 255*c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectA 256*c4762a1bSJed Brown filter: grep -v type 257*c4762a1bSJed Brown output_file: output/ex5_11_A.out 258*c4762a1bSJed Brown requires: cuda 259*c4762a1bSJed Brown 260*c4762a1bSJed Brown test: 261*c4762a1bSJed Brown suffix: aijcusparse_2 262*c4762a1bSJed Brown args: -mat_type seqaijcusparse -vec_type cuda -rectB 263*c4762a1bSJed Brown filter: grep -v type 264*c4762a1bSJed Brown output_file: output/ex5_11_B.out 265*c4762a1bSJed Brown requires: cuda 266*c4762a1bSJed Brown 267*c4762a1bSJed Brown test: 268*c4762a1bSJed Brown suffix: sell_1 269*c4762a1bSJed Brown args: -mat_type sell 270*c4762a1bSJed Brown output_file: output/ex5_41.out 271*c4762a1bSJed Brown 272*c4762a1bSJed Brown test: 273*c4762a1bSJed Brown suffix: sell_2 274*c4762a1bSJed Brown nsize: 3 275*c4762a1bSJed Brown args: -mat_type sell 276*c4762a1bSJed Brown output_file: output/ex5_43.out 277*c4762a1bSJed Brown 278*c4762a1bSJed Brown test: 279*c4762a1bSJed Brown suffix: sell_3 280*c4762a1bSJed Brown args: -mat_type sell -test_diagonalscale 281*c4762a1bSJed Brown output_file: output/ex5_51.out 282*c4762a1bSJed Brown 283*c4762a1bSJed Brown test: 284*c4762a1bSJed Brown suffix: sell_4 285*c4762a1bSJed Brown nsize: 3 286*c4762a1bSJed Brown args: -mat_type sell -test_diagonalscale 287*c4762a1bSJed Brown output_file: output/ex5_53.out 288*c4762a1bSJed Brown 289*c4762a1bSJed Brown TEST*/ 290