xref: /petsc/src/mat/tests/ex33.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1c4762a1bSJed Brown static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2c4762a1bSJed Brown Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
3c4762a1bSJed Brown 
4c4762a1bSJed Brown /*
5c4762a1bSJed Brown Example:
6c4762a1bSJed Brown   mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
7c4762a1bSJed Brown */
8c4762a1bSJed Brown 
9c4762a1bSJed Brown #include <petsc.h>
10c4762a1bSJed Brown 
11c4762a1bSJed Brown PetscErrorCode Print_memory(PetscLogDouble mem)
12c4762a1bSJed Brown {
13c4762a1bSJed Brown   double         max_mem,min_mem;
14c4762a1bSJed Brown 
15c4762a1bSJed Brown   PetscFunctionBeginUser;
165f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD));
175f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD));
18c4762a1bSJed Brown   max_mem = max_mem / 1024.0 / 1024.0;
19c4762a1bSJed Brown   min_mem = min_mem / 1024.0 / 1024.0;
205f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem));
21c4762a1bSJed Brown   PetscFunctionReturn(0);
22c4762a1bSJed Brown }
23c4762a1bSJed Brown 
24c4762a1bSJed Brown /*
25c4762a1bSJed Brown    Illustrate how to use MPI derived data types.
26c4762a1bSJed Brown    It would save memory significantly. See MatMPIDenseScatter()
27c4762a1bSJed Brown */
28c4762a1bSJed Brown PetscErrorCode TestMPIDerivedDataType()
29c4762a1bSJed Brown {
30c4762a1bSJed Brown   MPI_Datatype      type1, type2,rtype1,rtype2;
31c4762a1bSJed Brown   PetscInt          i,j;
32c4762a1bSJed Brown   PetscScalar       buffer[24]; /* An array of 4 rows, 6 cols */
33c4762a1bSJed Brown   MPI_Status        status;
34c4762a1bSJed Brown   PetscMPIInt       rank,size,disp[2];
35c4762a1bSJed Brown 
36c4762a1bSJed Brown   PetscFunctionBeginUser;
375f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
382c71b3e2SJacob Faibussowitsch   PetscCheckFalse(size < 2,PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use at least 2 processors");
395f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
40c4762a1bSJed Brown 
41c4762a1bSJed Brown   if (rank == 0) {
42c4762a1bSJed Brown     /* proc[0] sends 2 rows to proc[1] */
43c4762a1bSJed Brown     for (i=0; i<24; i++) buffer[i] = (PetscScalar)i;
44c4762a1bSJed Brown 
45c4762a1bSJed Brown     disp[0] = 0;  disp[1] = 2;
465f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1));
47c4762a1bSJed Brown     /* one column has 4 entries */
485f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2));
495f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_commit(&type2));
505f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD));
51c4762a1bSJed Brown 
52c4762a1bSJed Brown   } else if (rank == 1) {
53c4762a1bSJed Brown     /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
54c4762a1bSJed Brown     PetscInt blen = 2;
55c4762a1bSJed Brown     for (i=0; i<24; i++) buffer[i] = 0.0;
56c4762a1bSJed Brown 
57c4762a1bSJed Brown     disp[0] = 1;
585f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1));
595f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2));
60c4762a1bSJed Brown 
615f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_commit(&rtype2));
625f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status));
63c4762a1bSJed Brown     for (i=0; i<4; i++) {
64c4762a1bSJed Brown       for (j=0; j<6; j++) {
655f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscPrintf(MPI_COMM_SELF,"  %g", (double)PetscRealPart(buffer[i+j*4])));
66c4762a1bSJed Brown       }
675f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscPrintf(MPI_COMM_SELF,"\n"));
68c4762a1bSJed Brown     }
69c4762a1bSJed Brown   }
70c4762a1bSJed Brown 
71c4762a1bSJed Brown   if (rank == 0) {
725f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_free(&type1));
735f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_free(&type2));
74c4762a1bSJed Brown   } else if (rank == 1) {
755f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_free(&rtype1));
765f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Type_free(&rtype2));
77c4762a1bSJed Brown   }
785f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Barrier(MPI_COMM_WORLD));
79c4762a1bSJed Brown   PetscFunctionReturn(0);
80c4762a1bSJed Brown }
81c4762a1bSJed Brown 
82c4762a1bSJed Brown int main(int argc, char **args)
83c4762a1bSJed Brown {
84c4762a1bSJed Brown   PetscInt          mA = 2700,nX = 80,nz = 40;
85c4762a1bSJed Brown   /* PetscInt        mA=6,nX=5,nz=2; //small test */
86c4762a1bSJed Brown   PetscLogDouble    mem;
87c4762a1bSJed Brown   Mat               A,X,Y;
88c4762a1bSJed Brown   PetscBool         flg = PETSC_FALSE;
89c4762a1bSJed Brown 
90*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscInitialize(&argc,&args,(char*)0,help));
915f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL));
92c4762a1bSJed Brown   if (flg) {
935f80ce2aSJacob Faibussowitsch     CHKERRQ(TestMPIDerivedDataType());
94*b122ec5aSJacob Faibussowitsch     CHKERRQ(PetscFinalize());
95*b122ec5aSJacob Faibussowitsch     return 0;
96c4762a1bSJed Brown   }
97c4762a1bSJed Brown 
985f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL));
995f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemoryGetCurrentUsage(&mem));
100c4762a1bSJed Brown   if (flg) {
1015f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "Before start,"));
1025f80ce2aSJacob Faibussowitsch     CHKERRQ(Print_memory(mem));
103c4762a1bSJed Brown   }
104c4762a1bSJed Brown 
1055f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A));
1065f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetRandom(A,PETSC_NULL));
1075f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemoryGetCurrentUsage(&mem));
108c4762a1bSJed Brown   if (flg) {
1095f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After creating A,"));
1105f80ce2aSJacob Faibussowitsch     CHKERRQ(Print_memory(mem));
111c4762a1bSJed Brown   }
112c4762a1bSJed Brown 
1135f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X));
1145f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetRandom(X,PETSC_NULL));
1155f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemoryGetCurrentUsage(&mem));
116c4762a1bSJed Brown   if (flg) {
1175f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After creating X,"));
1185f80ce2aSJacob Faibussowitsch     CHKERRQ(Print_memory(mem));
119c4762a1bSJed Brown   }
120c4762a1bSJed Brown 
1215f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y));
1225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemoryGetCurrentUsage(&mem));
123c4762a1bSJed Brown   if (flg) {
1245f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,"));
1255f80ce2aSJacob Faibussowitsch     CHKERRQ(Print_memory(mem));
126c4762a1bSJed Brown   }
127c4762a1bSJed Brown 
128c4762a1bSJed Brown   /* Test reuse */
1295f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
1305f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemoryGetCurrentUsage(&mem));
131c4762a1bSJed Brown   if (flg) {
1325f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,"));
1335f80ce2aSJacob Faibussowitsch     CHKERRQ(Print_memory(mem));
134c4762a1bSJed Brown   }
135c4762a1bSJed Brown 
136c4762a1bSJed Brown   /* Check accuracy */
1375f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMatMultEqual(A,X,Y,10,&flg));
13828b400f6SJacob Faibussowitsch   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()");
139c4762a1bSJed Brown 
1405f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A));
1415f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&X));
1425f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&Y));
143c4762a1bSJed Brown 
144*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscFinalize());
145*b122ec5aSJacob Faibussowitsch   return 0;
146c4762a1bSJed Brown }
147c4762a1bSJed Brown 
148c4762a1bSJed Brown /*TEST
149c4762a1bSJed Brown 
150c4762a1bSJed Brown    test:
151c4762a1bSJed Brown       suffix: 1
152c4762a1bSJed Brown       nsize: 4
153c4762a1bSJed Brown       output_file: output/ex33.out
154c4762a1bSJed Brown 
155c4762a1bSJed Brown    test:
156c4762a1bSJed Brown       suffix: 2
157c4762a1bSJed Brown       nsize: 8
158c4762a1bSJed Brown       output_file: output/ex33.out
159c4762a1bSJed Brown 
160c4762a1bSJed Brown    test:
161c4762a1bSJed Brown       suffix: 3
162c4762a1bSJed Brown       nsize: 2
163c4762a1bSJed Brown       args: -test_mpiderivedtype
164c4762a1bSJed Brown       output_file: output/ex33_3.out
165c4762a1bSJed Brown 
166c4762a1bSJed Brown TEST*/
167