xref: /petsc/src/mat/tests/ex33.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2*c4762a1bSJed Brown Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
3*c4762a1bSJed Brown 
4*c4762a1bSJed Brown /*
5*c4762a1bSJed Brown Example:
6*c4762a1bSJed Brown   mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
7*c4762a1bSJed Brown */
8*c4762a1bSJed Brown 
9*c4762a1bSJed Brown #include <petsc.h>
10*c4762a1bSJed Brown 
11*c4762a1bSJed Brown PetscErrorCode Print_memory(PetscLogDouble mem)
12*c4762a1bSJed Brown {
13*c4762a1bSJed Brown   PetscErrorCode ierr;
14*c4762a1bSJed Brown   double         max_mem,min_mem;
15*c4762a1bSJed Brown 
16*c4762a1bSJed Brown   PetscFunctionBeginUser;
17*c4762a1bSJed Brown   ierr = MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);CHKERRQ(ierr);
18*c4762a1bSJed Brown   ierr = MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);CHKERRQ(ierr);
19*c4762a1bSJed Brown   max_mem = max_mem / 1024.0 / 1024.0;
20*c4762a1bSJed Brown   min_mem = min_mem / 1024.0 / 1024.0;
21*c4762a1bSJed Brown   ierr = PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem);CHKERRQ(ierr);
22*c4762a1bSJed Brown   PetscFunctionReturn(0);
23*c4762a1bSJed Brown }
24*c4762a1bSJed Brown 
25*c4762a1bSJed Brown /*
26*c4762a1bSJed Brown    Illustrate how to use MPI derived data types.
27*c4762a1bSJed Brown    It would save memory significantly. See MatMPIDenseScatter()
28*c4762a1bSJed Brown */
29*c4762a1bSJed Brown PetscErrorCode TestMPIDerivedDataType()
30*c4762a1bSJed Brown {
31*c4762a1bSJed Brown   PetscErrorCode    ierr;
32*c4762a1bSJed Brown   MPI_Datatype      type1, type2,rtype1,rtype2;
33*c4762a1bSJed Brown   PetscInt          i,j;
34*c4762a1bSJed Brown   PetscScalar       buffer[24]; /* An array of 4 rows, 6 cols */
35*c4762a1bSJed Brown   MPI_Status        status;
36*c4762a1bSJed Brown   PetscMPIInt       rank,size,disp[2];
37*c4762a1bSJed Brown 
38*c4762a1bSJed Brown   PetscFunctionBeginUser;
39*c4762a1bSJed Brown   ierr = MPI_Comm_size(MPI_COMM_WORLD, &size);CHKERRQ(ierr);
40*c4762a1bSJed Brown   if (size < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use at least 2 processors");
41*c4762a1bSJed Brown   ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);CHKERRQ(ierr);
42*c4762a1bSJed Brown 
43*c4762a1bSJed Brown   if (rank == 0) {
44*c4762a1bSJed Brown     /* proc[0] sends 2 rows to proc[1] */
45*c4762a1bSJed Brown     for (i=0; i<24; i++) buffer[i] = (PetscScalar)i;
46*c4762a1bSJed Brown 
47*c4762a1bSJed Brown     disp[0] = 0;  disp[1] = 2;
48*c4762a1bSJed Brown     ierr = MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1);CHKERRQ(ierr);
49*c4762a1bSJed Brown     /* one column has 4 entries */
50*c4762a1bSJed Brown     ierr = MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2);CHKERRQ(ierr);
51*c4762a1bSJed Brown     ierr = MPI_Type_commit(&type2);CHKERRQ(ierr);
52*c4762a1bSJed Brown     ierr = MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD);CHKERRQ(ierr);
53*c4762a1bSJed Brown 
54*c4762a1bSJed Brown   } else if (rank == 1) {
55*c4762a1bSJed Brown     /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
56*c4762a1bSJed Brown     PetscInt blen = 2;
57*c4762a1bSJed Brown     for (i=0; i<24; i++) buffer[i] = 0.0;
58*c4762a1bSJed Brown 
59*c4762a1bSJed Brown     disp[0] = 1;
60*c4762a1bSJed Brown     ierr = MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1);CHKERRQ(ierr);
61*c4762a1bSJed Brown     ierr = MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2);CHKERRQ(ierr);
62*c4762a1bSJed Brown 
63*c4762a1bSJed Brown     ierr = MPI_Type_commit(&rtype2);CHKERRQ(ierr);
64*c4762a1bSJed Brown     ierr = MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status);CHKERRQ(ierr);
65*c4762a1bSJed Brown     for (i=0; i<4; i++) {
66*c4762a1bSJed Brown       for (j=0; j<6; j++) {
67*c4762a1bSJed Brown         ierr = PetscPrintf(MPI_COMM_SELF,"  %g", (double)PetscRealPart(buffer[i+j*4]));
68*c4762a1bSJed Brown       }
69*c4762a1bSJed Brown       ierr = PetscPrintf(MPI_COMM_SELF,"\n");CHKERRQ(ierr);
70*c4762a1bSJed Brown     }
71*c4762a1bSJed Brown   }
72*c4762a1bSJed Brown 
73*c4762a1bSJed Brown   if (rank == 0) {
74*c4762a1bSJed Brown     ierr = MPI_Type_free(&type1);CHKERRQ(ierr);
75*c4762a1bSJed Brown     ierr = MPI_Type_free(&type2);CHKERRQ(ierr);
76*c4762a1bSJed Brown   } else if (rank == 1) {
77*c4762a1bSJed Brown     ierr = MPI_Type_free(&rtype1);CHKERRQ(ierr);
78*c4762a1bSJed Brown     ierr = MPI_Type_free(&rtype2);CHKERRQ(ierr);
79*c4762a1bSJed Brown   }
80*c4762a1bSJed Brown   ierr = MPI_Barrier(MPI_COMM_WORLD);CHKERRQ(ierr);
81*c4762a1bSJed Brown   PetscFunctionReturn(0);
82*c4762a1bSJed Brown }
83*c4762a1bSJed Brown 
84*c4762a1bSJed Brown int main(int argc, char **args)
85*c4762a1bSJed Brown {
86*c4762a1bSJed Brown   PetscInt          mA = 2700,nX = 80,nz = 40;
87*c4762a1bSJed Brown   /* PetscInt        mA=6,nX=5,nz=2; //small test */
88*c4762a1bSJed Brown   PetscLogDouble    mem;
89*c4762a1bSJed Brown   Mat               A,X,Y;
90*c4762a1bSJed Brown   PetscErrorCode    ierr;
91*c4762a1bSJed Brown   PetscBool         flg = PETSC_FALSE;
92*c4762a1bSJed Brown 
93*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
94*c4762a1bSJed Brown   ierr = PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL);CHKERRQ(ierr);
95*c4762a1bSJed Brown   if (flg) {
96*c4762a1bSJed Brown     ierr = TestMPIDerivedDataType();CHKERRQ(ierr);
97*c4762a1bSJed Brown     ierr = PetscFinalize();
98*c4762a1bSJed Brown     return ierr;
99*c4762a1bSJed Brown   }
100*c4762a1bSJed Brown 
101*c4762a1bSJed Brown   ierr = PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL);CHKERRQ(ierr);
102*c4762a1bSJed Brown   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
103*c4762a1bSJed Brown   if (flg) {
104*c4762a1bSJed Brown     ierr = PetscPrintf(MPI_COMM_WORLD, "Before start,");CHKERRQ(ierr);
105*c4762a1bSJed Brown     ierr = Print_memory(mem);CHKERRQ(ierr);
106*c4762a1bSJed Brown   }
107*c4762a1bSJed Brown 
108*c4762a1bSJed Brown   ierr = MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A);CHKERRQ(ierr);
109*c4762a1bSJed Brown   ierr = MatSetRandom(A,PETSC_NULL);CHKERRQ(ierr);
110*c4762a1bSJed Brown   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
111*c4762a1bSJed Brown   if (flg) {
112*c4762a1bSJed Brown     ierr = PetscPrintf(MPI_COMM_WORLD, "After creating A,");CHKERRQ(ierr);
113*c4762a1bSJed Brown     ierr = Print_memory(mem);CHKERRQ(ierr);
114*c4762a1bSJed Brown   }
115*c4762a1bSJed Brown 
116*c4762a1bSJed Brown   ierr = MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X);CHKERRQ(ierr);
117*c4762a1bSJed Brown   ierr = MatSetRandom(X,PETSC_NULL);CHKERRQ(ierr);
118*c4762a1bSJed Brown   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
119*c4762a1bSJed Brown   if (flg) {
120*c4762a1bSJed Brown     ierr = PetscPrintf(MPI_COMM_WORLD, "After creating X,");CHKERRQ(ierr);
121*c4762a1bSJed Brown     ierr = Print_memory(mem);CHKERRQ(ierr);
122*c4762a1bSJed Brown   }
123*c4762a1bSJed Brown 
124*c4762a1bSJed Brown   ierr = MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
125*c4762a1bSJed Brown   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
126*c4762a1bSJed Brown   if (flg) {
127*c4762a1bSJed Brown     ierr = PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,");CHKERRQ(ierr);
128*c4762a1bSJed Brown     ierr = Print_memory(mem);CHKERRQ(ierr);
129*c4762a1bSJed Brown   }
130*c4762a1bSJed Brown 
131*c4762a1bSJed Brown   /* Test reuse */
132*c4762a1bSJed Brown   ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
133*c4762a1bSJed Brown   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
134*c4762a1bSJed Brown   if (flg) {
135*c4762a1bSJed Brown     ierr = PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,");CHKERRQ(ierr);
136*c4762a1bSJed Brown     ierr = Print_memory(mem);CHKERRQ(ierr);
137*c4762a1bSJed Brown   }
138*c4762a1bSJed Brown 
139*c4762a1bSJed Brown   /* Check accuracy */
140*c4762a1bSJed Brown   ierr = MatMatMultEqual(A,X,Y,10,&flg);CHKERRQ(ierr);
141*c4762a1bSJed Brown   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()");
142*c4762a1bSJed Brown 
143*c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
144*c4762a1bSJed Brown   ierr = MatDestroy(&X);CHKERRQ(ierr);
145*c4762a1bSJed Brown   ierr = MatDestroy(&Y);CHKERRQ(ierr);
146*c4762a1bSJed Brown 
147*c4762a1bSJed Brown   ierr = PetscFinalize();
148*c4762a1bSJed Brown   return ierr;
149*c4762a1bSJed Brown }
150*c4762a1bSJed Brown 
151*c4762a1bSJed Brown /*TEST
152*c4762a1bSJed Brown 
153*c4762a1bSJed Brown    test:
154*c4762a1bSJed Brown       suffix: 1
155*c4762a1bSJed Brown       nsize: 4
156*c4762a1bSJed Brown       output_file: output/ex33.out
157*c4762a1bSJed Brown 
158*c4762a1bSJed Brown    test:
159*c4762a1bSJed Brown       suffix: 2
160*c4762a1bSJed Brown       nsize: 8
161*c4762a1bSJed Brown       output_file: output/ex33.out
162*c4762a1bSJed Brown 
163*c4762a1bSJed Brown    test:
164*c4762a1bSJed Brown       suffix: 3
165*c4762a1bSJed Brown       nsize: 2
166*c4762a1bSJed Brown       args: -test_mpiderivedtype
167*c4762a1bSJed Brown       output_file: output/ex33_3.out
168*c4762a1bSJed Brown 
169*c4762a1bSJed Brown TEST*/
170