xref: /petsc/src/sys/utils/mpishm.c (revision 5f7487a04e848b20a902d7fc7292f05f55508063)
1*5f7487a0SJunchao Zhang #include <petscsys.h>        /*I  "petscsys.h"  I*/
2*5f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
3*5f7487a0SJunchao Zhang 
4*5f7487a0SJunchao Zhang struct _n_PetscShmComm {
5*5f7487a0SJunchao Zhang   PetscMPIInt *globranks;       /* global ranks of each rank in the shared memory communicator */
6*5f7487a0SJunchao Zhang   PetscMPIInt shmsize;          /* size of the shared memory communicator */
7*5f7487a0SJunchao Zhang   MPI_Comm    globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
8*5f7487a0SJunchao Zhang };
9*5f7487a0SJunchao Zhang 
10*5f7487a0SJunchao Zhang /*
11*5f7487a0SJunchao Zhang    Private routine to delete internal tag/name shared memory communicator when a communicator is freed.
12*5f7487a0SJunchao Zhang 
13*5f7487a0SJunchao Zhang    This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this  data as an attribute is freed.
14*5f7487a0SJunchao Zhang 
15*5f7487a0SJunchao Zhang    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
16*5f7487a0SJunchao Zhang 
17*5f7487a0SJunchao Zhang */
18*5f7487a0SJunchao Zhang PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelComm_Shm(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state)
19*5f7487a0SJunchao Zhang {
20*5f7487a0SJunchao Zhang   PetscErrorCode  ierr;
21*5f7487a0SJunchao Zhang   PetscShmComm p = (PetscShmComm)val;
22*5f7487a0SJunchao Zhang 
23*5f7487a0SJunchao Zhang   PetscFunctionBegin;
24*5f7487a0SJunchao Zhang   ierr = PetscInfo1(0,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
25*5f7487a0SJunchao Zhang   ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr);
26*5f7487a0SJunchao Zhang   ierr = PetscFree(p->globranks);CHKERRMPI(ierr);
27*5f7487a0SJunchao Zhang   ierr = PetscFree(val);CHKERRMPI(ierr);
28*5f7487a0SJunchao Zhang   PetscFunctionReturn(MPI_SUCCESS);
29*5f7487a0SJunchao Zhang }
30*5f7487a0SJunchao Zhang 
31*5f7487a0SJunchao Zhang /*@C
32*5f7487a0SJunchao Zhang     PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory
33*5f7487a0SJunchao Zhang 
34*5f7487a0SJunchao Zhang 
35*5f7487a0SJunchao Zhang     Collective on comm.
36*5f7487a0SJunchao Zhang 
37*5f7487a0SJunchao Zhang     Input Parameter:
38*5f7487a0SJunchao Zhang .   globcomm - MPI_Comm
39*5f7487a0SJunchao Zhang 
40*5f7487a0SJunchao Zhang     Output Parameter:
41*5f7487a0SJunchao Zhang .   pshmcomm - the PETSc shared memory communicator object
42*5f7487a0SJunchao Zhang 
43*5f7487a0SJunchao Zhang     Level: developer
44*5f7487a0SJunchao Zhang 
45*5f7487a0SJunchao Zhang     Notes:
46*5f7487a0SJunchao Zhang     This should be called only with an PetscCommDuplicate() communictor
47*5f7487a0SJunchao Zhang 
48*5f7487a0SJunchao Zhang            When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
49*5f7487a0SJunchao Zhang 
50*5f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
51*5f7487a0SJunchao Zhang 
52*5f7487a0SJunchao Zhang @*/
53*5f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm)
54*5f7487a0SJunchao Zhang {
55*5f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
56*5f7487a0SJunchao Zhang   PetscErrorCode   ierr;
57*5f7487a0SJunchao Zhang   MPI_Group        globgroup,shmgroup;
58*5f7487a0SJunchao Zhang   PetscMPIInt      *shmranks,i,flg;
59*5f7487a0SJunchao Zhang   PetscCommCounter *counter;
60*5f7487a0SJunchao Zhang 
61*5f7487a0SJunchao Zhang   PetscFunctionBegin;
62*5f7487a0SJunchao Zhang   ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr);
63*5f7487a0SJunchao Zhang   if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator");
64*5f7487a0SJunchao Zhang 
65*5f7487a0SJunchao Zhang   ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRQ(ierr);
66*5f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
67*5f7487a0SJunchao Zhang 
68*5f7487a0SJunchao Zhang   ierr        = PetscNew(pshmcomm);CHKERRQ(ierr);
69*5f7487a0SJunchao Zhang   (*pshmcomm)->globcomm = globcomm;
70*5f7487a0SJunchao Zhang 
71*5f7487a0SJunchao Zhang   ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRQ(ierr);
72*5f7487a0SJunchao Zhang 
73*5f7487a0SJunchao Zhang   ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRQ(ierr);
74*5f7487a0SJunchao Zhang   ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRQ(ierr);
75*5f7487a0SJunchao Zhang   ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRQ(ierr);
76*5f7487a0SJunchao Zhang   ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr);
77*5f7487a0SJunchao Zhang   ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr);
78*5f7487a0SJunchao Zhang   for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i;
79*5f7487a0SJunchao Zhang   ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRQ(ierr);
80*5f7487a0SJunchao Zhang   ierr = PetscFree(shmranks);CHKERRQ(ierr);
81*5f7487a0SJunchao Zhang   ierr = MPI_Group_free(&globgroup);CHKERRQ(ierr);
82*5f7487a0SJunchao Zhang   ierr = MPI_Group_free(&shmgroup);CHKERRQ(ierr);
83*5f7487a0SJunchao Zhang 
84*5f7487a0SJunchao Zhang   for (i=0; i<(*pshmcomm)->shmsize; i++) {
85*5f7487a0SJunchao Zhang     ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr);
86*5f7487a0SJunchao Zhang   }
87*5f7487a0SJunchao Zhang   ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRQ(ierr);
88*5f7487a0SJunchao Zhang   PetscFunctionReturn(0);
89*5f7487a0SJunchao Zhang #else
90*5f7487a0SJunchao Zhang   SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
91*5f7487a0SJunchao Zhang #endif
92*5f7487a0SJunchao Zhang }
93*5f7487a0SJunchao Zhang 
94*5f7487a0SJunchao Zhang /*@C
95*5f7487a0SJunchao Zhang     PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
96*5f7487a0SJunchao Zhang 
97*5f7487a0SJunchao Zhang     Input Parameters:
98*5f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
99*5f7487a0SJunchao Zhang -   grank    - the global rank
100*5f7487a0SJunchao Zhang 
101*5f7487a0SJunchao Zhang     Output Parameter:
102*5f7487a0SJunchao Zhang .   lrank - the local rank, or MPI_PROC_NULL if it does not exist
103*5f7487a0SJunchao Zhang 
104*5f7487a0SJunchao Zhang     Level: developer
105*5f7487a0SJunchao Zhang 
106*5f7487a0SJunchao Zhang     Developer Notes:
107*5f7487a0SJunchao Zhang     Assumes the pshmcomm->globranks[] is sorted
108*5f7487a0SJunchao Zhang 
109*5f7487a0SJunchao Zhang     It may be better to rewrite this to map multiple global ranks to local in the same function call
110*5f7487a0SJunchao Zhang 
111*5f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
112*5f7487a0SJunchao Zhang 
113*5f7487a0SJunchao Zhang @*/
114*5f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank)
115*5f7487a0SJunchao Zhang {
116*5f7487a0SJunchao Zhang   PetscMPIInt    low,high,t,i;
117*5f7487a0SJunchao Zhang   PetscBool      flg = PETSC_FALSE;
118*5f7487a0SJunchao Zhang   PetscErrorCode ierr;
119*5f7487a0SJunchao Zhang 
120*5f7487a0SJunchao Zhang   PetscFunctionBegin;
121*5f7487a0SJunchao Zhang   *lrank = MPI_PROC_NULL;
122*5f7487a0SJunchao Zhang   if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0);
123*5f7487a0SJunchao Zhang   if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0);
124*5f7487a0SJunchao Zhang   ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr);
125*5f7487a0SJunchao Zhang   if (flg) PetscFunctionReturn(0);
126*5f7487a0SJunchao Zhang   low  = 0;
127*5f7487a0SJunchao Zhang   high = pshmcomm->shmsize;
128*5f7487a0SJunchao Zhang   while (high-low > 5) {
129*5f7487a0SJunchao Zhang     t = (low+high)/2;
130*5f7487a0SJunchao Zhang     if (pshmcomm->globranks[t] > grank) high = t;
131*5f7487a0SJunchao Zhang     else low = t;
132*5f7487a0SJunchao Zhang   }
133*5f7487a0SJunchao Zhang   for (i=low; i<high; i++) {
134*5f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0);
135*5f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] == grank) {
136*5f7487a0SJunchao Zhang       *lrank = i;
137*5f7487a0SJunchao Zhang       PetscFunctionReturn(0);
138*5f7487a0SJunchao Zhang     }
139*5f7487a0SJunchao Zhang   }
140*5f7487a0SJunchao Zhang   PetscFunctionReturn(0);
141*5f7487a0SJunchao Zhang }
142*5f7487a0SJunchao Zhang 
143*5f7487a0SJunchao Zhang /*@C
144*5f7487a0SJunchao Zhang     PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
145*5f7487a0SJunchao Zhang 
146*5f7487a0SJunchao Zhang     Input Parameters:
147*5f7487a0SJunchao Zhang +   pshmcomm - the shared memory communicator object
148*5f7487a0SJunchao Zhang -   lrank    - the local rank in the shared memory communicator
149*5f7487a0SJunchao Zhang 
150*5f7487a0SJunchao Zhang     Output Parameter:
151*5f7487a0SJunchao Zhang .   grank - the global rank in the global communicator where the shared memory communicator is built
152*5f7487a0SJunchao Zhang 
153*5f7487a0SJunchao Zhang     Level: developer
154*5f7487a0SJunchao Zhang 
155*5f7487a0SJunchao Zhang     Concepts: MPI subcomm^numbering
156*5f7487a0SJunchao Zhang @*/
157*5f7487a0SJunchao Zhang PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank)
158*5f7487a0SJunchao Zhang {
159*5f7487a0SJunchao Zhang   PetscFunctionBegin;
160*5f7487a0SJunchao Zhang #ifdef PETSC_USE_DEBUG
161*5f7487a0SJunchao Zhang   {
162*5f7487a0SJunchao Zhang     PetscErrorCode ierr;
163*5f7487a0SJunchao Zhang     if (lrank < 0 || lrank >= pshmcomm->shmsize) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank);CHKERRQ(ierr); }
164*5f7487a0SJunchao Zhang   }
165*5f7487a0SJunchao Zhang #endif
166*5f7487a0SJunchao Zhang   *grank = pshmcomm->globranks[lrank];
167*5f7487a0SJunchao Zhang   PetscFunctionReturn(0);
168*5f7487a0SJunchao Zhang }
169*5f7487a0SJunchao Zhang 
170*5f7487a0SJunchao Zhang /*@C
171*5f7487a0SJunchao Zhang     PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
172*5f7487a0SJunchao Zhang 
173*5f7487a0SJunchao Zhang     Input Parameter:
174*5f7487a0SJunchao Zhang .   pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
175*5f7487a0SJunchao Zhang 
176*5f7487a0SJunchao Zhang     Output Parameter:
177*5f7487a0SJunchao Zhang .   comm     - the MPI communicator
178*5f7487a0SJunchao Zhang 
179*5f7487a0SJunchao Zhang     Level: developer
180*5f7487a0SJunchao Zhang 
181*5f7487a0SJunchao Zhang @*/
182*5f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm)
183*5f7487a0SJunchao Zhang {
184*5f7487a0SJunchao Zhang   PetscFunctionBegin;
185*5f7487a0SJunchao Zhang   *comm = pshmcomm->shmcomm;
186*5f7487a0SJunchao Zhang   PetscFunctionReturn(0);
187*5f7487a0SJunchao Zhang }
188*5f7487a0SJunchao Zhang 
189