1c4762a1bSJed Brown static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2c4762a1bSJed Brown Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown /* 5c4762a1bSJed Brown Example: 6c4762a1bSJed Brown mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn> 7c4762a1bSJed Brown */ 8c4762a1bSJed Brown 9c4762a1bSJed Brown #include <petsc.h> 10c4762a1bSJed Brown 11c4762a1bSJed Brown PetscErrorCode Print_memory(PetscLogDouble mem) 12c4762a1bSJed Brown { 13c4762a1bSJed Brown double max_mem,min_mem; 14c4762a1bSJed Brown 15c4762a1bSJed Brown PetscFunctionBeginUser; 165f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD)); 175f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD)); 18c4762a1bSJed Brown max_mem = max_mem / 1024.0 / 1024.0; 19c4762a1bSJed Brown min_mem = min_mem / 1024.0 / 1024.0; 205f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem)); 21c4762a1bSJed Brown PetscFunctionReturn(0); 22c4762a1bSJed Brown } 23c4762a1bSJed Brown 24c4762a1bSJed Brown /* 25c4762a1bSJed Brown Illustrate how to use MPI derived data types. 26c4762a1bSJed Brown It would save memory significantly. See MatMPIDenseScatter() 27c4762a1bSJed Brown */ 28c4762a1bSJed Brown PetscErrorCode TestMPIDerivedDataType() 29c4762a1bSJed Brown { 30c4762a1bSJed Brown MPI_Datatype type1, type2,rtype1,rtype2; 31c4762a1bSJed Brown PetscInt i,j; 32c4762a1bSJed Brown PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 33c4762a1bSJed Brown MPI_Status status; 34c4762a1bSJed Brown PetscMPIInt rank,size,disp[2]; 35c4762a1bSJed Brown 36c4762a1bSJed Brown PetscFunctionBeginUser; 375f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 382c71b3e2SJacob Faibussowitsch PetscCheckFalse(size < 2,PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use at least 2 processors"); 395f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 40c4762a1bSJed Brown 41c4762a1bSJed Brown if (rank == 0) { 42c4762a1bSJed Brown /* proc[0] sends 2 rows to proc[1] */ 43c4762a1bSJed Brown for (i=0; i<24; i++) buffer[i] = (PetscScalar)i; 44c4762a1bSJed Brown 45c4762a1bSJed Brown disp[0] = 0; disp[1] = 2; 465f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1)); 47c4762a1bSJed Brown /* one column has 4 entries */ 485f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2)); 495f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_commit(&type2)); 505f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD)); 51c4762a1bSJed Brown 52c4762a1bSJed Brown } else if (rank == 1) { 53c4762a1bSJed Brown /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 54c4762a1bSJed Brown PetscInt blen = 2; 55c4762a1bSJed Brown for (i=0; i<24; i++) buffer[i] = 0.0; 56c4762a1bSJed Brown 57c4762a1bSJed Brown disp[0] = 1; 585f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1)); 595f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2)); 60c4762a1bSJed Brown 615f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_commit(&rtype2)); 625f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status)); 63c4762a1bSJed Brown for (i=0; i<4; i++) { 64c4762a1bSJed Brown for (j=0; j<6; j++) { 655f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_SELF," %g", (double)PetscRealPart(buffer[i+j*4]))); 66c4762a1bSJed Brown } 675f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_SELF,"\n")); 68c4762a1bSJed Brown } 69c4762a1bSJed Brown } 70c4762a1bSJed Brown 71c4762a1bSJed Brown if (rank == 0) { 725f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&type1)); 735f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&type2)); 74c4762a1bSJed Brown } else if (rank == 1) { 755f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&rtype1)); 765f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Type_free(&rtype2)); 77c4762a1bSJed Brown } 785f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Barrier(MPI_COMM_WORLD)); 79c4762a1bSJed Brown PetscFunctionReturn(0); 80c4762a1bSJed Brown } 81c4762a1bSJed Brown 82c4762a1bSJed Brown int main(int argc, char **args) 83c4762a1bSJed Brown { 84c4762a1bSJed Brown PetscInt mA = 2700,nX = 80,nz = 40; 85c4762a1bSJed Brown /* PetscInt mA=6,nX=5,nz=2; //small test */ 86c4762a1bSJed Brown PetscLogDouble mem; 87c4762a1bSJed Brown Mat A,X,Y; 88c4762a1bSJed Brown PetscErrorCode ierr; 89c4762a1bSJed Brown PetscBool flg = PETSC_FALSE; 90c4762a1bSJed Brown 91c4762a1bSJed Brown ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 925f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL)); 93c4762a1bSJed Brown if (flg) { 945f80ce2aSJacob Faibussowitsch CHKERRQ(TestMPIDerivedDataType()); 95c4762a1bSJed Brown ierr = PetscFinalize(); 96c4762a1bSJed Brown return ierr; 97c4762a1bSJed Brown } 98c4762a1bSJed Brown 995f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL)); 1005f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemoryGetCurrentUsage(&mem)); 101c4762a1bSJed Brown if (flg) { 1025f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "Before start,")); 1035f80ce2aSJacob Faibussowitsch CHKERRQ(Print_memory(mem)); 104c4762a1bSJed Brown } 105c4762a1bSJed Brown 1065f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A)); 1075f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetRandom(A,PETSC_NULL)); 1085f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemoryGetCurrentUsage(&mem)); 109c4762a1bSJed Brown if (flg) { 1105f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After creating A,")); 1115f80ce2aSJacob Faibussowitsch CHKERRQ(Print_memory(mem)); 112c4762a1bSJed Brown } 113c4762a1bSJed Brown 1145f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X)); 1155f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetRandom(X,PETSC_NULL)); 1165f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemoryGetCurrentUsage(&mem)); 117c4762a1bSJed Brown if (flg) { 1185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After creating X,")); 1195f80ce2aSJacob Faibussowitsch CHKERRQ(Print_memory(mem)); 120c4762a1bSJed Brown } 121c4762a1bSJed Brown 1225f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y)); 1235f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemoryGetCurrentUsage(&mem)); 124c4762a1bSJed Brown if (flg) { 1255f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,")); 1265f80ce2aSJacob Faibussowitsch CHKERRQ(Print_memory(mem)); 127c4762a1bSJed Brown } 128c4762a1bSJed Brown 129c4762a1bSJed Brown /* Test reuse */ 1305f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 1315f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMemoryGetCurrentUsage(&mem)); 132c4762a1bSJed Brown if (flg) { 1335f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,")); 1345f80ce2aSJacob Faibussowitsch CHKERRQ(Print_memory(mem)); 135c4762a1bSJed Brown } 136c4762a1bSJed Brown 137c4762a1bSJed Brown /* Check accuracy */ 1385f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMultEqual(A,X,Y,10,&flg)); 139*28b400f6SJacob Faibussowitsch PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()"); 140c4762a1bSJed Brown 1415f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 1425f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&X)); 1435f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Y)); 144c4762a1bSJed Brown 145c4762a1bSJed Brown ierr = PetscFinalize(); 146c4762a1bSJed Brown return ierr; 147c4762a1bSJed Brown } 148c4762a1bSJed Brown 149c4762a1bSJed Brown /*TEST 150c4762a1bSJed Brown 151c4762a1bSJed Brown test: 152c4762a1bSJed Brown suffix: 1 153c4762a1bSJed Brown nsize: 4 154c4762a1bSJed Brown output_file: output/ex33.out 155c4762a1bSJed Brown 156c4762a1bSJed Brown test: 157c4762a1bSJed Brown suffix: 2 158c4762a1bSJed Brown nsize: 8 159c4762a1bSJed Brown output_file: output/ex33.out 160c4762a1bSJed Brown 161c4762a1bSJed Brown test: 162c4762a1bSJed Brown suffix: 3 163c4762a1bSJed Brown nsize: 2 164c4762a1bSJed Brown args: -test_mpiderivedtype 165c4762a1bSJed Brown output_file: output/ex33_3.out 166c4762a1bSJed Brown 167c4762a1bSJed Brown TEST*/ 168