1c4762a1bSJed Brown static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2c4762a1bSJed Brown Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3c4762a1bSJed Brown 4c4762a1bSJed Brown /* 5c4762a1bSJed Brown Example: 6*1e2d28a0SPierre Jolivet mpiexec -n <np> ./ex33 -mem_view -matproduct_batch_size <Bbn> 7c4762a1bSJed Brown */ 8c4762a1bSJed Brown 9c4762a1bSJed Brown #include <petsc.h> 10c4762a1bSJed Brown 11d71ae5a4SJacob Faibussowitsch PetscErrorCode Print_memory(PetscLogDouble mem) 12d71ae5a4SJacob Faibussowitsch { 13c4762a1bSJed Brown double max_mem, min_mem; 14c4762a1bSJed Brown 15c4762a1bSJed Brown PetscFunctionBeginUser; 169566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD)); 179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD)); 18c4762a1bSJed Brown max_mem = max_mem / 1024.0 / 1024.0; 19c4762a1bSJed Brown min_mem = min_mem / 1024.0 / 1024.0; 209566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem)); 213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22c4762a1bSJed Brown } 23c4762a1bSJed Brown 24c4762a1bSJed Brown /* 25c4762a1bSJed Brown Illustrate how to use MPI derived data types. 26c4762a1bSJed Brown It would save memory significantly. See MatMPIDenseScatter() 27c4762a1bSJed Brown */ 28d71ae5a4SJacob Faibussowitsch PetscErrorCode TestMPIDerivedDataType() 29d71ae5a4SJacob Faibussowitsch { 30c4762a1bSJed Brown MPI_Datatype type1, type2, rtype1, rtype2; 31c4762a1bSJed Brown PetscInt i, j; 32c4762a1bSJed Brown PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 33c4762a1bSJed Brown MPI_Status status; 34c4762a1bSJed Brown PetscMPIInt rank, size, disp[2]; 35c4762a1bSJed Brown 36c4762a1bSJed Brown PetscFunctionBeginUser; 379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 38be096a46SBarry Smith PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors"); 399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 40c4762a1bSJed Brown 41c4762a1bSJed Brown if (rank == 0) { 42c4762a1bSJed Brown /* proc[0] sends 2 rows to proc[1] */ 43c4762a1bSJed Brown for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i; 44c4762a1bSJed Brown 459371c9d4SSatish Balay disp[0] = 0; 469371c9d4SSatish Balay disp[1] = 2; 479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1)); 48c4762a1bSJed Brown /* one column has 4 entries */ 499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2)); 509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&type2)); 519566063dSJacob Faibussowitsch PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD)); 52c4762a1bSJed Brown 53c4762a1bSJed Brown } else if (rank == 1) { 54c4762a1bSJed Brown /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 55c4762a1bSJed Brown for (i = 0; i < 24; i++) buffer[i] = 0.0; 56c4762a1bSJed Brown 57c4762a1bSJed Brown disp[0] = 1; 586497c311SBarry Smith PetscCallMPI(MPI_Type_create_indexed_block(1, 2, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1)); 599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2)); 60c4762a1bSJed Brown 619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_commit(&rtype2)); 629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status)); 63c4762a1bSJed Brown for (i = 0; i < 4; i++) { 6448a46eb9SPierre Jolivet for (j = 0; j < 6; j++) PetscCall(PetscPrintf(MPI_COMM_SELF, " %g", (double)PetscRealPart(buffer[i + j * 4]))); 659566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_SELF, "\n")); 66c4762a1bSJed Brown } 67c4762a1bSJed Brown } 68c4762a1bSJed Brown 69c4762a1bSJed Brown if (rank == 0) { 709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&type1)); 719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&type2)); 72c4762a1bSJed Brown } else if (rank == 1) { 739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&rtype1)); 749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_free(&rtype2)); 75c4762a1bSJed Brown } 769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 78c4762a1bSJed Brown } 79c4762a1bSJed Brown 80d71ae5a4SJacob Faibussowitsch int main(int argc, char **args) 81d71ae5a4SJacob Faibussowitsch { 82c4762a1bSJed Brown PetscInt mA = 2700, nX = 80, nz = 40; 83c4762a1bSJed Brown /* PetscInt mA=6,nX=5,nz=2; //small test */ 84c4762a1bSJed Brown PetscLogDouble mem; 85c4762a1bSJed Brown Mat A, X, Y; 86c4762a1bSJed Brown PetscBool flg = PETSC_FALSE; 87c4762a1bSJed Brown 88327415f7SBarry Smith PetscFunctionBeginUser; 89c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &args, NULL, help)); 909566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL)); 91c4762a1bSJed Brown if (flg) { 929566063dSJacob Faibussowitsch PetscCall(TestMPIDerivedDataType()); 939566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 94b122ec5aSJacob Faibussowitsch return 0; 95c4762a1bSJed Brown } 96c4762a1bSJed Brown 979566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL)); 989566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 99c4762a1bSJed Brown if (flg) { 1009566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,")); 1019566063dSJacob Faibussowitsch PetscCall(Print_memory(mem)); 102c4762a1bSJed Brown } 103c4762a1bSJed Brown 104f3fa974cSJacob Faibussowitsch PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, NULL, nz, NULL, &A)); 105f3fa974cSJacob Faibussowitsch PetscCall(MatSetRandom(A, NULL)); 1069566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 107c4762a1bSJed Brown if (flg) { 1089566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,")); 1099566063dSJacob Faibussowitsch PetscCall(Print_memory(mem)); 110c4762a1bSJed Brown } 111c4762a1bSJed Brown 112f3fa974cSJacob Faibussowitsch PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, NULL, &X)); 113f3fa974cSJacob Faibussowitsch PetscCall(MatSetRandom(X, NULL)); 1149566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 115c4762a1bSJed Brown if (flg) { 1169566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,")); 1179566063dSJacob Faibussowitsch PetscCall(Print_memory(mem)); 118c4762a1bSJed Brown } 119c4762a1bSJed Brown 120fb842aefSJose E. Roman PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Y)); 1219566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 122c4762a1bSJed Brown if (flg) { 1239566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,")); 1249566063dSJacob Faibussowitsch PetscCall(Print_memory(mem)); 125c4762a1bSJed Brown } 126c4762a1bSJed Brown 127c4762a1bSJed Brown /* Test reuse */ 128fb842aefSJose E. Roman PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y)); 1299566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 130c4762a1bSJed Brown if (flg) { 1319566063dSJacob Faibussowitsch PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,")); 1329566063dSJacob Faibussowitsch PetscCall(Print_memory(mem)); 133c4762a1bSJed Brown } 134c4762a1bSJed Brown 135c4762a1bSJed Brown /* Check accuracy */ 1369566063dSJacob Faibussowitsch PetscCall(MatMatMultEqual(A, X, Y, 10, &flg)); 13728b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()"); 138c4762a1bSJed Brown 1399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 1419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Y)); 142c4762a1bSJed Brown 1439566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 144b122ec5aSJacob Faibussowitsch return 0; 145c4762a1bSJed Brown } 146c4762a1bSJed Brown 147c4762a1bSJed Brown /*TEST 148c4762a1bSJed Brown 149c4762a1bSJed Brown test: 150c4762a1bSJed Brown suffix: 1 151c4762a1bSJed Brown nsize: 4 1523886731fSPierre Jolivet output_file: output/empty.out 153c4762a1bSJed Brown 154c4762a1bSJed Brown test: 155c4762a1bSJed Brown suffix: 2 156c4762a1bSJed Brown nsize: 8 1573886731fSPierre Jolivet output_file: output/empty.out 158c4762a1bSJed Brown 159c4762a1bSJed Brown test: 160c4762a1bSJed Brown suffix: 3 161c4762a1bSJed Brown nsize: 2 162c4762a1bSJed Brown args: -test_mpiderivedtype 163c4762a1bSJed Brown output_file: output/ex33_3.out 164c4762a1bSJed Brown 165c4762a1bSJed Brown TEST*/ 166