xref: /petsc/src/mat/tests/ex183.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n"
2*c4762a1bSJed Brown   "This test can only be run in parallel.\n"
3*c4762a1bSJed Brown   "\n";
4*c4762a1bSJed Brown 
5*c4762a1bSJed Brown /*T
6*c4762a1bSJed Brown    Concepts: Mat^mat submatrix, parallel
7*c4762a1bSJed Brown    Processors: n
8*c4762a1bSJed Brown T*/
9*c4762a1bSJed Brown 
10*c4762a1bSJed Brown 
11*c4762a1bSJed Brown #include <petscmat.h>
12*c4762a1bSJed Brown 
13*c4762a1bSJed Brown int main(int argc, char **args)
14*c4762a1bSJed Brown {
15*c4762a1bSJed Brown   Mat             A,*submats;
16*c4762a1bSJed Brown   MPI_Comm        subcomm;
17*c4762a1bSJed Brown   PetscMPIInt     rank,size,subrank,subsize,color;
18*c4762a1bSJed Brown   PetscInt        m,n,N,bs,rstart,rend,i,j,k,total_subdomains,hash,nsubdomains=1;
19*c4762a1bSJed Brown   PetscInt        nis,*cols,gnsubdomains,gsubdomainnums[1],gsubdomainperm[1],s,gs;
20*c4762a1bSJed Brown   PetscInt        *rowindices,*colindices,idx,rep;
21*c4762a1bSJed Brown   PetscScalar     *vals;
22*c4762a1bSJed Brown   IS              rowis[1],colis[1];
23*c4762a1bSJed Brown   PetscViewer     viewer;
24*c4762a1bSJed Brown   PetscBool       permute_indices,flg;
25*c4762a1bSJed Brown   PetscErrorCode  ierr;
26*c4762a1bSJed Brown 
27*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
28*c4762a1bSJed Brown   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
29*c4762a1bSJed Brown   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
30*c4762a1bSJed Brown 
31*c4762a1bSJed Brown   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"ex183","Mat");CHKERRQ(ierr);
32*c4762a1bSJed Brown   m = 5;
33*c4762a1bSJed Brown   ierr = PetscOptionsInt("-m","Local matrix size","MatSetSizes",m,&m,&flg);CHKERRQ(ierr);
34*c4762a1bSJed Brown   total_subdomains = size-1;
35*c4762a1bSJed Brown   ierr = PetscOptionsInt("-total_subdomains","Number of submatrices where 0 < n < comm size","MatCreateSubMatricesMPI",total_subdomains,&total_subdomains,&flg);CHKERRQ(ierr);
36*c4762a1bSJed Brown   permute_indices = PETSC_FALSE;
37*c4762a1bSJed Brown   ierr = PetscOptionsBool("-permute_indices","Whether to permute indices before breaking them into subdomains","ISCreateGeneral",permute_indices,&permute_indices,&flg);CHKERRQ(ierr);
38*c4762a1bSJed Brown   hash = 7;
39*c4762a1bSJed Brown   ierr = PetscOptionsInt("-hash","Permutation factor, which has to be relatively prime to M = size*m (total matrix size)","ISCreateGeneral",hash,&hash,&flg);CHKERRQ(ierr);
40*c4762a1bSJed Brown   rep = 2;
41*c4762a1bSJed Brown   ierr = PetscOptionsInt("-rep","Number of times to carry out submatrix extractions; currently only 1 & 2 are supported",NULL,rep,&rep,&flg);CHKERRQ(ierr);
42*c4762a1bSJed Brown   ierr = PetscOptionsEnd();CHKERRQ(ierr);
43*c4762a1bSJed Brown 
44*c4762a1bSJed Brown   if (total_subdomains > size) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Number of subdomains %D must not exceed comm size %D",total_subdomains,size);
45*c4762a1bSJed Brown   if (total_subdomains < 1 || total_subdomains > size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of subdomains must be > 0 and <= %D (comm size), got total_subdomains = %D",size,total_subdomains);
46*c4762a1bSJed Brown   if (rep != 1 && rep != 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid number of test repetitions: %D; must be 1 or 2",rep);
47*c4762a1bSJed Brown 
48*c4762a1bSJed Brown   viewer = PETSC_VIEWER_STDOUT_WORLD;
49*c4762a1bSJed Brown   /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */
50*c4762a1bSJed Brown   ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
51*c4762a1bSJed Brown   ierr = MatSetSizes(A,m,m,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
52*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
53*c4762a1bSJed Brown   ierr = MatSetUp(A);CHKERRQ(ierr);
54*c4762a1bSJed Brown   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
55*c4762a1bSJed Brown   ierr = MatGetLocalSize(A,NULL,&n);CHKERRQ(ierr);
56*c4762a1bSJed Brown   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
57*c4762a1bSJed Brown   ierr = MatSeqAIJSetPreallocation(A,n,NULL);CHKERRQ(ierr);
58*c4762a1bSJed Brown   ierr = MatMPIAIJSetPreallocation(A,n,NULL,N-n,NULL);CHKERRQ(ierr);
59*c4762a1bSJed Brown   ierr = MatSeqBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr);
60*c4762a1bSJed Brown   ierr = MatMPIBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr);
61*c4762a1bSJed Brown   ierr = MatSeqSBAIJSetPreallocation(A,bs,n/bs,NULL);CHKERRQ(ierr);
62*c4762a1bSJed Brown   ierr = MatMPISBAIJSetPreallocation(A,bs,n/bs,NULL,(N-n)/bs,NULL);CHKERRQ(ierr);
63*c4762a1bSJed Brown 
64*c4762a1bSJed Brown   ierr = PetscMalloc2(N,&cols,N,&vals);CHKERRQ(ierr);
65*c4762a1bSJed Brown   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
66*c4762a1bSJed Brown   for (j = 0; j < N; ++j) cols[j] = j;
67*c4762a1bSJed Brown   for (i=rstart; i<rend; i++) {
68*c4762a1bSJed Brown     for (j=0;j<N;++j) {
69*c4762a1bSJed Brown       vals[j] = i*10000+j;
70*c4762a1bSJed Brown     }
71*c4762a1bSJed Brown     ierr = MatSetValues(A,1,&i,N,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
72*c4762a1bSJed Brown   }
73*c4762a1bSJed Brown   ierr = PetscFree2(cols,vals);CHKERRQ(ierr);
74*c4762a1bSJed Brown   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
75*c4762a1bSJed Brown   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
76*c4762a1bSJed Brown 
77*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"Initial matrix:\n");CHKERRQ(ierr);
78*c4762a1bSJed Brown   ierr = MatView(A,viewer);CHKERRQ(ierr);
79*c4762a1bSJed Brown 
80*c4762a1bSJed Brown 
81*c4762a1bSJed Brown   /*
82*c4762a1bSJed Brown      Create subcomms and ISs so that each rank participates in one IS.
83*c4762a1bSJed Brown      The IS either coalesces adjacent rank indices (contiguous),
84*c4762a1bSJed Brown      or selects indices by scrambling them using a hash.
85*c4762a1bSJed Brown   */
86*c4762a1bSJed Brown   k = size/total_subdomains + (size%total_subdomains>0); /* There are up to k ranks to a color */
87*c4762a1bSJed Brown   color = rank/k;
88*c4762a1bSJed Brown   ierr = MPI_Comm_split(PETSC_COMM_WORLD,color,rank,&subcomm);CHKERRQ(ierr);
89*c4762a1bSJed Brown   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
90*c4762a1bSJed Brown   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
91*c4762a1bSJed Brown   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
92*c4762a1bSJed Brown   nis = 1;
93*c4762a1bSJed Brown   ierr = PetscMalloc2(rend-rstart,&rowindices,rend-rstart,&colindices);CHKERRQ(ierr);
94*c4762a1bSJed Brown 
95*c4762a1bSJed Brown   for (j = rstart; j < rend; ++j) {
96*c4762a1bSJed Brown     if (permute_indices) {
97*c4762a1bSJed Brown       idx = (j*hash);
98*c4762a1bSJed Brown     } else {
99*c4762a1bSJed Brown       idx = j;
100*c4762a1bSJed Brown     }
101*c4762a1bSJed Brown     rowindices[j-rstart] = idx%N;
102*c4762a1bSJed Brown     colindices[j-rstart] = (idx+m)%N;
103*c4762a1bSJed Brown   }
104*c4762a1bSJed Brown   ierr = ISCreateGeneral(subcomm,rend-rstart,rowindices,PETSC_COPY_VALUES,&rowis[0]);CHKERRQ(ierr);
105*c4762a1bSJed Brown   ierr = ISCreateGeneral(subcomm,rend-rstart,colindices,PETSC_COPY_VALUES,&colis[0]);CHKERRQ(ierr);
106*c4762a1bSJed Brown   ierr = ISSort(rowis[0]);CHKERRQ(ierr);
107*c4762a1bSJed Brown   ierr = ISSort(colis[0]);CHKERRQ(ierr);
108*c4762a1bSJed Brown   ierr = PetscFree2(rowindices,colindices);CHKERRQ(ierr);
109*c4762a1bSJed Brown   /*
110*c4762a1bSJed Brown     Now view the ISs.  To avoid deadlock when viewing a list of objects on different subcomms,
111*c4762a1bSJed Brown     we need to obtain the global numbers of our local objects and wait for the corresponding global
112*c4762a1bSJed Brown     number to be viewed.
113*c4762a1bSJed Brown   */
114*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"Subdomains");CHKERRQ(ierr);
115*c4762a1bSJed Brown   if (permute_indices) {
116*c4762a1bSJed Brown     ierr = PetscViewerASCIIPrintf(viewer," (hash=%D)",hash);CHKERRQ(ierr);
117*c4762a1bSJed Brown   }
118*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,":\n");CHKERRQ(ierr);
119*c4762a1bSJed Brown   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
120*c4762a1bSJed Brown 
121*c4762a1bSJed Brown   nsubdomains = 1;
122*c4762a1bSJed Brown   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
123*c4762a1bSJed Brown   ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)rowis,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr);
124*c4762a1bSJed Brown   ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr);
125*c4762a1bSJed Brown   for (gs=0,s=0; gs < gnsubdomains;++gs) {
126*c4762a1bSJed Brown     if (s < nsubdomains) {
127*c4762a1bSJed Brown       PetscInt ss;
128*c4762a1bSJed Brown       ss = gsubdomainperm[s];
129*c4762a1bSJed Brown       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
130*c4762a1bSJed Brown         PetscViewer subviewer = NULL;
131*c4762a1bSJed Brown         ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr);
132*c4762a1bSJed Brown         ierr = PetscViewerASCIIPrintf(subviewer,"Row IS %D\n",gs);CHKERRQ(ierr);
133*c4762a1bSJed Brown         ierr = ISView(rowis[ss],subviewer);CHKERRQ(ierr);
134*c4762a1bSJed Brown         ierr = PetscViewerFlush(subviewer);CHKERRQ(ierr);
135*c4762a1bSJed Brown         ierr = PetscViewerASCIIPrintf(subviewer,"Col IS %D\n",gs);CHKERRQ(ierr);
136*c4762a1bSJed Brown         ierr = ISView(colis[ss],subviewer);CHKERRQ(ierr);
137*c4762a1bSJed Brown         ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)rowis[ss]),&subviewer);CHKERRQ(ierr);
138*c4762a1bSJed Brown         ++s;
139*c4762a1bSJed Brown       }
140*c4762a1bSJed Brown     }
141*c4762a1bSJed Brown     ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
142*c4762a1bSJed Brown   }
143*c4762a1bSJed Brown   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
144*c4762a1bSJed Brown   ierr = ISSort(rowis[0]);CHKERRQ(ierr);
145*c4762a1bSJed Brown   ierr = ISSort(colis[0]);CHKERRQ(ierr);
146*c4762a1bSJed Brown   nsubdomains = 1;
147*c4762a1bSJed Brown   ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_INITIAL_MATRIX,&submats);CHKERRQ(ierr);
148*c4762a1bSJed Brown   /*
149*c4762a1bSJed Brown     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
150*c4762a1bSJed Brown     we need to obtain the global numbers of our local objects and wait for the corresponding global
151*c4762a1bSJed Brown     number to be viewed.
152*c4762a1bSJed Brown   */
153*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 1):\n");CHKERRQ(ierr);
154*c4762a1bSJed Brown   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
155*c4762a1bSJed Brown   ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr);
156*c4762a1bSJed Brown   ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr);
157*c4762a1bSJed Brown   for (gs=0,s=0; gs < gnsubdomains;++gs) {
158*c4762a1bSJed Brown     if (s < nsubdomains) {
159*c4762a1bSJed Brown       PetscInt ss;
160*c4762a1bSJed Brown       ss = gsubdomainperm[s];
161*c4762a1bSJed Brown       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
162*c4762a1bSJed Brown         PetscViewer subviewer = NULL;
163*c4762a1bSJed Brown         ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr);
164*c4762a1bSJed Brown         ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr);
165*c4762a1bSJed Brown         ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr);
166*c4762a1bSJed Brown         ++s;
167*c4762a1bSJed Brown       }
168*c4762a1bSJed Brown     }
169*c4762a1bSJed Brown     ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
170*c4762a1bSJed Brown   }
171*c4762a1bSJed Brown   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172*c4762a1bSJed Brown   if (rep == 1) goto cleanup;
173*c4762a1bSJed Brown   nsubdomains = 1;
174*c4762a1bSJed Brown   ierr = MatCreateSubMatricesMPI(A,nsubdomains,rowis,colis,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr);
175*c4762a1bSJed Brown   /*
176*c4762a1bSJed Brown     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
177*c4762a1bSJed Brown     we need to obtain the global numbers of our local objects and wait for the corresponding global
178*c4762a1bSJed Brown     number to be viewed.
179*c4762a1bSJed Brown   */
180*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"Submatrices (repetition 2):\n");CHKERRQ(ierr);
181*c4762a1bSJed Brown   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
182*c4762a1bSJed Brown   ierr = PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD,1,(PetscObject*)submats,&gnsubdomains,gsubdomainnums);CHKERRQ(ierr);
183*c4762a1bSJed Brown   ierr = PetscSortIntWithPermutation(nsubdomains,gsubdomainnums,gsubdomainperm);CHKERRQ(ierr);
184*c4762a1bSJed Brown   for (gs=0,s=0; gs < gnsubdomains;++gs) {
185*c4762a1bSJed Brown     if (s < nsubdomains) {
186*c4762a1bSJed Brown       PetscInt ss;
187*c4762a1bSJed Brown       ss = gsubdomainperm[s];
188*c4762a1bSJed Brown       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
189*c4762a1bSJed Brown         PetscViewer subviewer = NULL;
190*c4762a1bSJed Brown         ierr = PetscViewerGetSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr);
191*c4762a1bSJed Brown         ierr = MatView(submats[ss],subviewer);CHKERRQ(ierr);
192*c4762a1bSJed Brown         ierr = PetscViewerRestoreSubViewer(viewer,PetscObjectComm((PetscObject)submats[ss]),&subviewer);CHKERRQ(ierr);
193*c4762a1bSJed Brown         ++s;
194*c4762a1bSJed Brown       }
195*c4762a1bSJed Brown     }
196*c4762a1bSJed Brown     ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr);
197*c4762a1bSJed Brown   }
198*c4762a1bSJed Brown   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
199*c4762a1bSJed Brown   cleanup:
200*c4762a1bSJed Brown   for (k=0;k<nsubdomains;++k) {
201*c4762a1bSJed Brown     ierr = MatDestroy(submats+k);CHKERRQ(ierr);
202*c4762a1bSJed Brown   }
203*c4762a1bSJed Brown   ierr = PetscFree(submats);CHKERRQ(ierr);
204*c4762a1bSJed Brown   for (k=0;k<nis;++k) {
205*c4762a1bSJed Brown     ierr = ISDestroy(rowis+k);CHKERRQ(ierr);
206*c4762a1bSJed Brown     ierr = ISDestroy(colis+k);CHKERRQ(ierr);
207*c4762a1bSJed Brown   }
208*c4762a1bSJed Brown   ierr = MatDestroy(&A);CHKERRQ(ierr);
209*c4762a1bSJed Brown   ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
210*c4762a1bSJed Brown   ierr = PetscFinalize();
211*c4762a1bSJed Brown   return ierr;
212*c4762a1bSJed Brown }
213*c4762a1bSJed Brown 
214*c4762a1bSJed Brown 
215*c4762a1bSJed Brown /*TEST
216*c4762a1bSJed Brown 
217*c4762a1bSJed Brown    test:
218*c4762a1bSJed Brown       nsize: 2
219*c4762a1bSJed Brown       args: -total_subdomains 1
220*c4762a1bSJed Brown       output_file: output/ex183_2_1.out
221*c4762a1bSJed Brown 
222*c4762a1bSJed Brown    test:
223*c4762a1bSJed Brown       suffix: 2
224*c4762a1bSJed Brown       nsize: 3
225*c4762a1bSJed Brown       args: -total_subdomains 2
226*c4762a1bSJed Brown       output_file: output/ex183_3_2.out
227*c4762a1bSJed Brown 
228*c4762a1bSJed Brown    test:
229*c4762a1bSJed Brown       suffix: 3
230*c4762a1bSJed Brown       nsize: 4
231*c4762a1bSJed Brown       args: -total_subdomains 2
232*c4762a1bSJed Brown       output_file: output/ex183_4_2.out
233*c4762a1bSJed Brown 
234*c4762a1bSJed Brown    test:
235*c4762a1bSJed Brown       suffix: 4
236*c4762a1bSJed Brown       nsize: 6
237*c4762a1bSJed Brown       args: -total_subdomains 2
238*c4762a1bSJed Brown       output_file: output/ex183_6_2.out
239*c4762a1bSJed Brown 
240*c4762a1bSJed Brown TEST*/
241