xref: /petsc/src/mat/utils/overlapsplit.c (revision 2452736b626c5516aa3e80c9b2dbe3345798995e)
1*2452736bSFande Kong #include <petscsf.h>
2*2452736bSFande Kong #include <petsc/private/matimpl.h>
3*2452736bSFande Kong 
4*2452736bSFande Kong 
5*2452736bSFande Kong /*
6*2452736bSFande Kong  * Increase overlap for the sub-matrix across sub communicator
7*2452736bSFande Kong  * sub-matrix could be a graph or numerical matrix
8*2452736bSFande Kong  * */
9*2452736bSFande Kong PetscErrorCode  MatIncreaseOverlapSplit_Single(Mat mat,IS *is,PetscInt ov)
10*2452736bSFande Kong {
11*2452736bSFande Kong   PetscInt         i,nindx,*indices_sc,*indices_ov,localsize,*localsizes_sc,localsize_tmp;
12*2452736bSFande Kong   PetscInt         *indices_ov_rd,nroots,nleaves,*localoffsets,*indices_recv,*sources_sc,*sources_sc_rd;
13*2452736bSFande Kong   const PetscInt   *indices;
14*2452736bSFande Kong   PetscMPIInt      srank,ssize,issamecomm,k;
15*2452736bSFande Kong   IS               is_sc,allis_sc,partitioning;
16*2452736bSFande Kong   MPI_Comm         gcomm,scomm;
17*2452736bSFande Kong   PetscSF          sf;
18*2452736bSFande Kong   PetscSFNode      *remote;
19*2452736bSFande Kong   Mat              *smat;
20*2452736bSFande Kong   MatPartitioning  part;
21*2452736bSFande Kong   PetscErrorCode   ierr;
22*2452736bSFande Kong 
23*2452736bSFande Kong   PetscFunctionBegin;
24*2452736bSFande Kong   /*get a sub communicator before call individual MatIncreaseOverlap
25*2452736bSFande Kong    * since the sub communicator may be changed.
26*2452736bSFande Kong    * */
27*2452736bSFande Kong   ierr = PetscObjectGetComm((PetscObject)*is,&scomm);CHKERRQ(ierr);
28*2452736bSFande Kong   /*increase overlap on each individual subdomain*/
29*2452736bSFande Kong   ierr = (*mat->ops->increaseoverlap)(mat,1,is,ov);CHKERRQ(ierr);
30*2452736bSFande Kong   /*get a global communicator  */
31*2452736bSFande Kong   ierr = PetscObjectGetComm((PetscObject)mat,&gcomm);CHKERRQ(ierr);
32*2452736bSFande Kong   /*compare communicators */
33*2452736bSFande Kong   ierr = MPI_Comm_compare(gcomm,scomm,&issamecomm);CHKERRQ(ierr);
34*2452736bSFande Kong   /* if the sub-communicator is the same as the global communicator,
35*2452736bSFande Kong    * user does not want to use a sub-communicator
36*2452736bSFande Kong    * */
37*2452736bSFande Kong   if(issamecomm == MPI_IDENT) PetscFunctionReturn(0);
38*2452736bSFande Kong   /* if the sub-communicator is petsc_comm_self,
39*2452736bSFande Kong    * user also does not care the sub-communicator
40*2452736bSFande Kong    * */
41*2452736bSFande Kong   ierr = MPI_Comm_compare(scomm,PETSC_COMM_SELF,&issamecomm);CHKERRQ(ierr);
42*2452736bSFande Kong   if(issamecomm == MPI_IDENT) PetscFunctionReturn(0);
43*2452736bSFande Kong   /*local rank, size in a subcomm */
44*2452736bSFande Kong   ierr = MPI_Comm_rank(scomm,&srank);CHKERRQ(ierr);
45*2452736bSFande Kong   ierr = MPI_Comm_size(scomm,&ssize);CHKERRQ(ierr);
46*2452736bSFande Kong   /*create a new IS based on subcomm
47*2452736bSFande Kong    * since the old IS is often petsc_comm_self
48*2452736bSFande Kong    * */
49*2452736bSFande Kong   ierr = ISGetLocalSize(*is,&nindx);CHKERRQ(ierr);
50*2452736bSFande Kong   ierr = PetscCalloc1(nindx,&indices_sc);CHKERRQ(ierr);
51*2452736bSFande Kong   ierr = ISGetIndices(*is,&indices);CHKERRQ(ierr);
52*2452736bSFande Kong   ierr = PetscMemcpy(indices_sc,indices,sizeof(PetscInt)*nindx);CHKERRQ(ierr);
53*2452736bSFande Kong   ierr = ISRestoreIndices(*is,&indices);CHKERRQ(ierr);
54*2452736bSFande Kong   /*we do not need any more*/
55*2452736bSFande Kong   ierr = ISDestroy(is);CHKERRQ(ierr);
56*2452736bSFande Kong   /*create a index set based on the sub communicator  */
57*2452736bSFande Kong   ierr = ISCreateGeneral(scomm,nindx,indices_sc,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr);
58*2452736bSFande Kong   /*gather all indices within  the sub communicator*/
59*2452736bSFande Kong   ierr = ISAllGather(is_sc,&allis_sc);CHKERRQ(ierr);
60*2452736bSFande Kong   ierr = ISDestroy(&is_sc);CHKERRQ(ierr);
61*2452736bSFande Kong   /* gather local sizes */
62*2452736bSFande Kong   ierr = PetscMalloc1(ssize,&localsizes_sc);CHKERRQ(ierr);
63*2452736bSFande Kong   ierr = MPI_Gather(&nindx,1,MPIU_INT,localsizes_sc,1,MPIU_INT,0,scomm);CHKERRQ(ierr);
64*2452736bSFande Kong   /*only root does these computation */
65*2452736bSFande Kong   if(!srank){
66*2452736bSFande Kong    /*get local size for the big index set*/
67*2452736bSFande Kong    ierr = ISGetLocalSize(allis_sc,&localsize);CHKERRQ(ierr);
68*2452736bSFande Kong    ierr = PetscCalloc2(localsize,&indices_ov,localsize,&sources_sc);CHKERRQ(ierr);
69*2452736bSFande Kong    ierr = PetscCalloc2(localsize,&indices_ov_rd,localsize,&sources_sc_rd);CHKERRQ(ierr);
70*2452736bSFande Kong    ierr = ISGetIndices(allis_sc,&indices);CHKERRQ(ierr);
71*2452736bSFande Kong    ierr = PetscMemcpy(indices_ov,indices,sizeof(PetscInt)*localsize);CHKERRQ(ierr);
72*2452736bSFande Kong    ierr = ISRestoreIndices(allis_sc,&indices);CHKERRQ(ierr);
73*2452736bSFande Kong 
74*2452736bSFande Kong    ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
75*2452736bSFande Kong    /*assign corresponding sources */
76*2452736bSFande Kong    localsize_tmp = 0;
77*2452736bSFande Kong    for(k=0; k<ssize; k++){
78*2452736bSFande Kong      for(i=0; i<localsizes_sc[k]; i++){
79*2452736bSFande Kong        sources_sc[localsize_tmp++] = k;
80*2452736bSFande Kong      }
81*2452736bSFande Kong    }
82*2452736bSFande Kong    /*record where indices come from */
83*2452736bSFande Kong    ierr = PetscSortIntWithArray(localsize,indices_ov,sources_sc);CHKERRQ(ierr);
84*2452736bSFande Kong    localsize_tmp = 1;
85*2452736bSFande Kong    ierr = PetscMemzero(localsizes_sc,sizeof(PetscInt)*ssize);CHKERRQ(ierr);
86*2452736bSFande Kong    /*initialize the first entities*/
87*2452736bSFande Kong    if(localsize){
88*2452736bSFande Kong 	 indices_ov_rd[0] = indices_ov[0];
89*2452736bSFande Kong 	 sources_sc_rd[0] = sources_sc[0];
90*2452736bSFande Kong 	 localsizes_sc[sources_sc[0]]++;
91*2452736bSFande Kong    }
92*2452736bSFande Kong    /*remove duplicate integers */
93*2452736bSFande Kong    for(i=1; i<localsize; i++){
94*2452736bSFande Kong 	 if(indices_ov[i] != indices_ov[i-1]){
95*2452736bSFande Kong 	   indices_ov_rd[localsize_tmp]   = indices_ov[i];
96*2452736bSFande Kong 	   sources_sc_rd[localsize_tmp++] = sources_sc[i];
97*2452736bSFande Kong 	   localsizes_sc[sources_sc[i]]++;
98*2452736bSFande Kong 	 }
99*2452736bSFande Kong    }
100*2452736bSFande Kong    ierr = PetscFree2(indices_ov,sources_sc);CHKERRQ(ierr);
101*2452736bSFande Kong    ierr = PetscCalloc1(ssize+1,&localoffsets);CHKERRQ(ierr);
102*2452736bSFande Kong    for(k=0; k<ssize; k++){
103*2452736bSFande Kong 	 localoffsets[k+1] = localoffsets[k] + localsizes_sc[i];
104*2452736bSFande Kong    }
105*2452736bSFande Kong    /*build a star forest to send data back */
106*2452736bSFande Kong    nleaves = localoffsets[ssize];
107*2452736bSFande Kong    ierr = PetscMemzero(localoffsets,(ssize+1)*sizeof(PetscInt));CHKERRQ(ierr);
108*2452736bSFande Kong    nroots  = localsizes_sc[srank];
109*2452736bSFande Kong    ierr = PetscCalloc1(nleaves,&remote);CHKERRQ(ierr);
110*2452736bSFande Kong    for(i=0; i<nleaves; i++){
111*2452736bSFande Kong 	 remote[i].rank  = sources_sc[i];
112*2452736bSFande Kong 	 remote[i].index = localoffsets[sources_sc[i]]++;
113*2452736bSFande Kong    }
114*2452736bSFande Kong   }else{
115*2452736bSFande Kong    ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
116*2452736bSFande Kong    nleaves = 0;
117*2452736bSFande Kong    indices_ov_rd = 0;
118*2452736bSFande Kong    sources_sc_rd = 0;
119*2452736bSFande Kong   }
120*2452736bSFande Kong   /*scatter sizes to everybody */
121*2452736bSFande Kong   ierr = MPI_Scatter(localsizes_sc,1, MPIU_INT,&nroots,1, MPIU_INT,0,scomm);CHKERRQ(ierr);
122*2452736bSFande Kong   ierr = PetscCalloc1(nroots,&indices_recv);CHKERRQ(ierr);
123*2452736bSFande Kong   /*set data back to every body */
124*2452736bSFande Kong   ierr = PetscSFCreate(scomm,&sf);CHKERRQ(ierr);
125*2452736bSFande Kong   ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);
126*2452736bSFande Kong   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
127*2452736bSFande Kong   ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
128*2452736bSFande Kong   ierr = PetscSFReduceBegin(sf,MPIU_INT,&indices_ov_rd,indices_recv,MPI_REPLACE);CHKERRQ(ierr);
129*2452736bSFande Kong   ierr = PetscSFReduceEnd(sf,MPIU_INT,&indices_ov_rd,indices_recv,MPI_REPLACE);CHKERRQ(ierr);
130*2452736bSFande Kong   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
131*2452736bSFande Kong   /* free memory */
132*2452736bSFande Kong   ierr = PetscFree2(indices_ov_rd,sources_sc_rd);CHKERRQ(ierr);
133*2452736bSFande Kong   /*create a index set*/
134*2452736bSFande Kong   ierr = ISCreateGeneral(scomm,nroots,indices_recv,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr);
135*2452736bSFande Kong   /*create a index set for cols */
136*2452736bSFande Kong   ierr = ISAllGather(is_sc,&allis_sc);CHKERRQ(ierr);
137*2452736bSFande Kong   /*reparition */
138*2452736bSFande Kong   /*construct a parallel submatrix */
139*2452736bSFande Kong   ierr = PetscCalloc1(1,&smat);CHKERRQ(ierr);
140*2452736bSFande Kong   ierr = MatGetSubMatricesMPI(mat,1,&is_sc,&allis_sc,MAT_INITIAL_MATRIX,&smat);CHKERRQ(ierr);
141*2452736bSFande Kong   /* we do not need them any more */
142*2452736bSFande Kong   ierr = ISDestroy(&is_sc);CHKERRQ(ierr);
143*2452736bSFande Kong   ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
144*2452736bSFande Kong   /*create a partitioner to repartition the sub-matrix*/
145*2452736bSFande Kong   ierr = MatPartitioningCreate(scomm,&part);CHKERRQ(ierr);
146*2452736bSFande Kong   ierr = MatPartitioningSetAdjacency(part,smat[0]);CHKERRQ(ierr);
147*2452736bSFande Kong #if PETSC_HAVE_PARMETIS
148*2452736bSFande Kong   /* if there exists a ParMETIS installation, we try to use ParMETIS
149*2452736bSFande Kong    * because a repartition routine possibly work better
150*2452736bSFande Kong    * */
151*2452736bSFande Kong   ierr = MatPartitioningSetType(part,MATPARTITIONINGPARMETIS);CHKERRQ(ierr);
152*2452736bSFande Kong   /*try to use reparition function, instead of partition function */
153*2452736bSFande Kong   ierr = MatPartitioningParmetisSetRepartition(part);CHKERRQ(ierr);
154*2452736bSFande Kong #else
155*2452736bSFande Kong   /*we at least provide a default partitioner to rebalance the computation  */
156*2452736bSFande Kong   ierr = MatPartitioningSetType(part,MATPARTITIONINGAVERAGE);CHKERRQ(ierr);
157*2452736bSFande Kong #endif
158*2452736bSFande Kong   /*user can pick up any partitioner by using an option*/
159*2452736bSFande Kong   ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
160*2452736bSFande Kong   /* apply partition */
161*2452736bSFande Kong   ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
162*2452736bSFande Kong   ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
163*2452736bSFande Kong   ierr = PetscFree(smat);CHKERRQ(ierr);
164*2452736bSFande Kong   /* get local rows including  overlap */
165*2452736bSFande Kong   ierr = ISBuildTwoSided(partitioning,is);CHKERRQ(ierr);
166*2452736bSFande Kong   PetscFunctionReturn(0);
167*2452736bSFande Kong }
168*2452736bSFande Kong 
169*2452736bSFande Kong 
170