xref: /petsc/src/vec/vec/impls/mpi/pvec2.c (revision 016dedfbbf12d22485fdef1f26b4b0c5f640f062)
1 #define PETSCVEC_DLL
2 /*
3      Code for some of the parallel vector primatives.
4 */
5 #include "src/vec/vec/impls/mpi/pvecimpl.h"
6 #include "src/inline/dot.h"
7 #include "petscblaslapack.h"
8 
9 #define do_not_use_ethernet
10 PetscErrorCode Ethernet_Allreduce(PetscReal *in,PetscReal *out,PetscInt n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
11 {
12   PetscErrorCode ierr;
13   PetscMPIInt    rank,size;
14   PetscInt            i;
15   MPI_Status     status;
16 
17 
18   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
19   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
20 
21   if (rank) {
22     ierr = MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);CHKERRQ(ierr);
23     for (i =0; i<n; i++) in[i] += out[i];
24   }
25   if (rank != size - 1) {
26     ierr = MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);CHKERRQ(ierr);
27   }
28   if (rank == size-1) {
29     for (i=0; i<n; i++) out[i] = in[i];
30   } else {
31     ierr = MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);CHKERRQ(ierr);
32   }
33   if (rank) {
34     ierr = MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);CHKERRQ(ierr);
35   }
36   return 0;
37 }
38 
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "VecMDot_MPI"
42 PetscErrorCode VecMDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
43 {
44   PetscScalar awork[128],*work = awork;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   if (nv > 128) {
49     ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr);
50   }
51   ierr = VecMDot_Seq(nv,xin,y,work);CHKERRQ(ierr);
52   ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);CHKERRQ(ierr);
53   if (nv > 128) {
54     ierr = PetscFree(work);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "VecMTDot_MPI"
61 PetscErrorCode VecMTDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z)
62 {
63   PetscScalar awork[128],*work = awork;
64   PetscErrorCode ierr;
65 
66   PetscFunctionBegin;
67   if (nv > 128) {
68     ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr);
69   }
70   ierr = VecMTDot_Seq(nv,xin,y,work);CHKERRQ(ierr);
71   ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);CHKERRQ(ierr);
72   if (nv > 128) {
73     ierr = PetscFree(work);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "VecNorm_MPI"
80 PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
81 {
82   Vec_MPI        *x = (Vec_MPI*)xin->data;
83   PetscReal      sum,work = 0.0;
84   PetscScalar    *xx = x->array;
85   PetscErrorCode ierr;
86   PetscInt       n = xin->n;
87 
88   PetscFunctionBegin;
89   if (type == NORM_2 || type == NORM_FROBENIUS) {
90 
91 #if defined(PETSC_HAVE_SLOW_BLAS_NORM2)
92 #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
93     fortrannormsqr_(xx,&n,&work);
94 #elif defined(PETSC_USE_UNROLLED_NORM)
95     switch (n & 0x3) {
96       case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
97       case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
98       case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
99     }
100     while (n>0) {
101       work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
102                         xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
103       xx += 4; n -= 4;
104     }
105 #else
106     {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
107 #endif
108 #else
109     {PetscBLASInt bn = (PetscBLASInt)n, one = 1;
110       work  = BLASnrm2_(&bn,xx,&one);
111       work *= work;
112     }
113 #endif
114     ierr = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr);
115     *z = sqrt(sum);
116     ierr = PetscLogFlops(2*xin->n);CHKERRQ(ierr);
117   } else if (type == NORM_1) {
118     /* Find the local part */
119     ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr);
120     /* Find the global max */
121     ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr);
122   } else if (type == NORM_INFINITY) {
123     /* Find the local max */
124     ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr);
125     /* Find the global max */
126     ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);CHKERRQ(ierr);
127   } else if (type == NORM_1_AND_2) {
128     PetscReal temp[2];
129     ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr);
130     ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr);
131     temp[1] = temp[1]*temp[1];
132     ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr);
133     z[1] = sqrt(z[1]);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 /*
139        These two functions are the MPI reduction operation used for max and min with index
140    The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
141    MPI operator Vec[Max,Min]_Local_Op.
142 */
143 MPI_Op VecMax_Local_Op = 0;
144 MPI_Op VecMin_Local_Op = 0;
145 
146 EXTERN_C_BEGIN
147 #undef __FUNCT__
148 #define __FUNCT__ "VecMax_Local"
149 void PETSCVEC_DLLEXPORT VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
150 {
151   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
152 
153   PetscFunctionBegin;
154   if (*datatype != MPIU_REAL) {
155     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
156     MPI_Abort(MPI_COMM_WORLD,1);
157   }
158   if (xin[0] > xout[0]) {
159     xout[0] = xin[0];
160     xout[1] = xin[1];
161   }
162   PetscFunctionReturnVoid(); /* cannot return a value */
163 }
164 EXTERN_C_END
165 
166 EXTERN_C_BEGIN
167 #undef __FUNCT__
168 #define __FUNCT__ "VecMin_Local"
169 void PETSCVEC_DLLEXPORT VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
170 {
171   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
172 
173   PetscFunctionBegin;
174   if (*datatype != MPIU_REAL) {
175     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
176     MPI_Abort(MPI_COMM_WORLD,1);
177   }
178   if (xin[0] < xout[0]) {
179     xout[0] = xin[0];
180     xout[1] = xin[1];
181   }
182   PetscFunctionReturnVoid();
183 }
184 EXTERN_C_END
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "VecMax_MPI"
188 PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z)
189 {
190   PetscErrorCode ierr;
191   PetscReal      work;
192 
193   PetscFunctionBegin;
194   /* Find the local max */
195   ierr = VecMax_Seq(xin,idx,&work);CHKERRQ(ierr);
196 
197   /* Find the global max */
198   if (!idx) {
199     ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);CHKERRQ(ierr);
200   } else {
201     PetscReal work2[2],z2[2];
202     PetscInt  rstart;
203 
204     ierr = VecGetOwnershipRange(xin,&rstart,PETSC_NULL);CHKERRQ(ierr);
205     work2[0] = work;
206     work2[1] = *idx + rstart;
207     ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);CHKERRQ(ierr);
208     *z   = z2[0];
209     *idx = (PetscInt)z2[1];
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "VecMin_MPI"
216 PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z)
217 {
218   PetscErrorCode ierr;
219   PetscReal work;
220 
221   PetscFunctionBegin;
222   /* Find the local Min */
223   ierr = VecMin_Seq(xin,idx,&work);CHKERRQ(ierr);
224 
225   /* Find the global Min */
226   if (!idx) {
227     ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);CHKERRQ(ierr);
228   } else {
229     PetscReal work2[2],z2[2];
230     PetscInt       rstart;
231 
232     ierr = VecGetOwnershipRange(xin,&rstart,PETSC_NULL);CHKERRQ(ierr);
233     work2[0] = work;
234     work2[1] = *idx + rstart;
235     ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);CHKERRQ(ierr);
236     *z   = z2[0];
237     *idx = (PetscInt)z2[1];
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 
243 
244 
245 
246 
247 
248 
249