1*c4762a1bSJed Brown static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2*c4762a1bSJed Brown Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3*c4762a1bSJed Brown 4*c4762a1bSJed Brown /* 5*c4762a1bSJed Brown Example: 6*c4762a1bSJed Brown mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn> 7*c4762a1bSJed Brown */ 8*c4762a1bSJed Brown 9*c4762a1bSJed Brown #include <petsc.h> 10*c4762a1bSJed Brown 11*c4762a1bSJed Brown PetscErrorCode Print_memory(PetscLogDouble mem) 12*c4762a1bSJed Brown { 13*c4762a1bSJed Brown PetscErrorCode ierr; 14*c4762a1bSJed Brown double max_mem,min_mem; 15*c4762a1bSJed Brown 16*c4762a1bSJed Brown PetscFunctionBeginUser; 17*c4762a1bSJed Brown ierr = MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);CHKERRQ(ierr); 18*c4762a1bSJed Brown ierr = MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);CHKERRQ(ierr); 19*c4762a1bSJed Brown max_mem = max_mem / 1024.0 / 1024.0; 20*c4762a1bSJed Brown min_mem = min_mem / 1024.0 / 1024.0; 21*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem);CHKERRQ(ierr); 22*c4762a1bSJed Brown PetscFunctionReturn(0); 23*c4762a1bSJed Brown } 24*c4762a1bSJed Brown 25*c4762a1bSJed Brown /* 26*c4762a1bSJed Brown Illustrate how to use MPI derived data types. 27*c4762a1bSJed Brown It would save memory significantly. See MatMPIDenseScatter() 28*c4762a1bSJed Brown */ 29*c4762a1bSJed Brown PetscErrorCode TestMPIDerivedDataType() 30*c4762a1bSJed Brown { 31*c4762a1bSJed Brown PetscErrorCode ierr; 32*c4762a1bSJed Brown MPI_Datatype type1, type2,rtype1,rtype2; 33*c4762a1bSJed Brown PetscInt i,j; 34*c4762a1bSJed Brown PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 35*c4762a1bSJed Brown MPI_Status status; 36*c4762a1bSJed Brown PetscMPIInt rank,size,disp[2]; 37*c4762a1bSJed Brown 38*c4762a1bSJed Brown PetscFunctionBeginUser; 39*c4762a1bSJed Brown ierr = MPI_Comm_size(MPI_COMM_WORLD, &size);CHKERRQ(ierr); 40*c4762a1bSJed Brown if (size < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use at least 2 processors"); 41*c4762a1bSJed Brown ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);CHKERRQ(ierr); 42*c4762a1bSJed Brown 43*c4762a1bSJed Brown if (rank == 0) { 44*c4762a1bSJed Brown /* proc[0] sends 2 rows to proc[1] */ 45*c4762a1bSJed Brown for (i=0; i<24; i++) buffer[i] = (PetscScalar)i; 46*c4762a1bSJed Brown 47*c4762a1bSJed Brown disp[0] = 0; disp[1] = 2; 48*c4762a1bSJed Brown ierr = MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1);CHKERRQ(ierr); 49*c4762a1bSJed Brown /* one column has 4 entries */ 50*c4762a1bSJed Brown ierr = MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2);CHKERRQ(ierr); 51*c4762a1bSJed Brown ierr = MPI_Type_commit(&type2);CHKERRQ(ierr); 52*c4762a1bSJed Brown ierr = MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD);CHKERRQ(ierr); 53*c4762a1bSJed Brown 54*c4762a1bSJed Brown } else if (rank == 1) { 55*c4762a1bSJed Brown /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 56*c4762a1bSJed Brown PetscInt blen = 2; 57*c4762a1bSJed Brown for (i=0; i<24; i++) buffer[i] = 0.0; 58*c4762a1bSJed Brown 59*c4762a1bSJed Brown disp[0] = 1; 60*c4762a1bSJed Brown ierr = MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1);CHKERRQ(ierr); 61*c4762a1bSJed Brown ierr = MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2);CHKERRQ(ierr); 62*c4762a1bSJed Brown 63*c4762a1bSJed Brown ierr = MPI_Type_commit(&rtype2);CHKERRQ(ierr); 64*c4762a1bSJed Brown ierr = MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status);CHKERRQ(ierr); 65*c4762a1bSJed Brown for (i=0; i<4; i++) { 66*c4762a1bSJed Brown for (j=0; j<6; j++) { 67*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_SELF," %g", (double)PetscRealPart(buffer[i+j*4])); 68*c4762a1bSJed Brown } 69*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_SELF,"\n");CHKERRQ(ierr); 70*c4762a1bSJed Brown } 71*c4762a1bSJed Brown } 72*c4762a1bSJed Brown 73*c4762a1bSJed Brown if (rank == 0) { 74*c4762a1bSJed Brown ierr = MPI_Type_free(&type1);CHKERRQ(ierr); 75*c4762a1bSJed Brown ierr = MPI_Type_free(&type2);CHKERRQ(ierr); 76*c4762a1bSJed Brown } else if (rank == 1) { 77*c4762a1bSJed Brown ierr = MPI_Type_free(&rtype1);CHKERRQ(ierr); 78*c4762a1bSJed Brown ierr = MPI_Type_free(&rtype2);CHKERRQ(ierr); 79*c4762a1bSJed Brown } 80*c4762a1bSJed Brown ierr = MPI_Barrier(MPI_COMM_WORLD);CHKERRQ(ierr); 81*c4762a1bSJed Brown PetscFunctionReturn(0); 82*c4762a1bSJed Brown } 83*c4762a1bSJed Brown 84*c4762a1bSJed Brown int main(int argc, char **args) 85*c4762a1bSJed Brown { 86*c4762a1bSJed Brown PetscInt mA = 2700,nX = 80,nz = 40; 87*c4762a1bSJed Brown /* PetscInt mA=6,nX=5,nz=2; //small test */ 88*c4762a1bSJed Brown PetscLogDouble mem; 89*c4762a1bSJed Brown Mat A,X,Y; 90*c4762a1bSJed Brown PetscErrorCode ierr; 91*c4762a1bSJed Brown PetscBool flg = PETSC_FALSE; 92*c4762a1bSJed Brown 93*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 94*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL);CHKERRQ(ierr); 95*c4762a1bSJed Brown if (flg) { 96*c4762a1bSJed Brown ierr = TestMPIDerivedDataType();CHKERRQ(ierr); 97*c4762a1bSJed Brown ierr = PetscFinalize(); 98*c4762a1bSJed Brown return ierr; 99*c4762a1bSJed Brown } 100*c4762a1bSJed Brown 101*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL);CHKERRQ(ierr); 102*c4762a1bSJed Brown ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 103*c4762a1bSJed Brown if (flg) { 104*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, "Before start,");CHKERRQ(ierr); 105*c4762a1bSJed Brown ierr = Print_memory(mem);CHKERRQ(ierr); 106*c4762a1bSJed Brown } 107*c4762a1bSJed Brown 108*c4762a1bSJed Brown ierr = MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A);CHKERRQ(ierr); 109*c4762a1bSJed Brown ierr = MatSetRandom(A,PETSC_NULL);CHKERRQ(ierr); 110*c4762a1bSJed Brown ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 111*c4762a1bSJed Brown if (flg) { 112*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, "After creating A,");CHKERRQ(ierr); 113*c4762a1bSJed Brown ierr = Print_memory(mem);CHKERRQ(ierr); 114*c4762a1bSJed Brown } 115*c4762a1bSJed Brown 116*c4762a1bSJed Brown ierr = MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X);CHKERRQ(ierr); 117*c4762a1bSJed Brown ierr = MatSetRandom(X,PETSC_NULL);CHKERRQ(ierr); 118*c4762a1bSJed Brown ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 119*c4762a1bSJed Brown if (flg) { 120*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, "After creating X,");CHKERRQ(ierr); 121*c4762a1bSJed Brown ierr = Print_memory(mem);CHKERRQ(ierr); 122*c4762a1bSJed Brown } 123*c4762a1bSJed Brown 124*c4762a1bSJed Brown ierr = MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 125*c4762a1bSJed Brown ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 126*c4762a1bSJed Brown if (flg) { 127*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,");CHKERRQ(ierr); 128*c4762a1bSJed Brown ierr = Print_memory(mem);CHKERRQ(ierr); 129*c4762a1bSJed Brown } 130*c4762a1bSJed Brown 131*c4762a1bSJed Brown /* Test reuse */ 132*c4762a1bSJed Brown ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 133*c4762a1bSJed Brown ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 134*c4762a1bSJed Brown if (flg) { 135*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,");CHKERRQ(ierr); 136*c4762a1bSJed Brown ierr = Print_memory(mem);CHKERRQ(ierr); 137*c4762a1bSJed Brown } 138*c4762a1bSJed Brown 139*c4762a1bSJed Brown /* Check accuracy */ 140*c4762a1bSJed Brown ierr = MatMatMultEqual(A,X,Y,10,&flg);CHKERRQ(ierr); 141*c4762a1bSJed Brown if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()"); 142*c4762a1bSJed Brown 143*c4762a1bSJed Brown ierr = MatDestroy(&A);CHKERRQ(ierr); 144*c4762a1bSJed Brown ierr = MatDestroy(&X);CHKERRQ(ierr); 145*c4762a1bSJed Brown ierr = MatDestroy(&Y);CHKERRQ(ierr); 146*c4762a1bSJed Brown 147*c4762a1bSJed Brown ierr = PetscFinalize(); 148*c4762a1bSJed Brown return ierr; 149*c4762a1bSJed Brown } 150*c4762a1bSJed Brown 151*c4762a1bSJed Brown /*TEST 152*c4762a1bSJed Brown 153*c4762a1bSJed Brown test: 154*c4762a1bSJed Brown suffix: 1 155*c4762a1bSJed Brown nsize: 4 156*c4762a1bSJed Brown output_file: output/ex33.out 157*c4762a1bSJed Brown 158*c4762a1bSJed Brown test: 159*c4762a1bSJed Brown suffix: 2 160*c4762a1bSJed Brown nsize: 8 161*c4762a1bSJed Brown output_file: output/ex33.out 162*c4762a1bSJed Brown 163*c4762a1bSJed Brown test: 164*c4762a1bSJed Brown suffix: 3 165*c4762a1bSJed Brown nsize: 2 166*c4762a1bSJed Brown args: -test_mpiderivedtype 167*c4762a1bSJed Brown output_file: output/ex33_3.out 168*c4762a1bSJed Brown 169*c4762a1bSJed Brown TEST*/ 170