1 #define PETSCVEC_DLL 2 /* 3 Code for some of the parallel vector primatives. 4 */ 5 #include "src/vec/vec/impls/mpi/pvecimpl.h" 6 #include "src/inline/dot.h" 7 #include "petscblaslapack.h" 8 9 #define do_not_use_ethernet 10 PetscErrorCode Ethernet_Allreduce(PetscReal *in,PetscReal *out,PetscInt n,MPI_Datatype type,MPI_Op op,MPI_Comm comm) 11 { 12 PetscErrorCode ierr; 13 PetscMPIInt rank,size; 14 PetscInt i; 15 MPI_Status status; 16 17 18 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 19 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 20 21 if (rank) { 22 ierr = MPI_Recv(out,n,MPIU_REAL,rank-1,837,comm,&status);CHKERRQ(ierr); 23 for (i =0; i<n; i++) in[i] += out[i]; 24 } 25 if (rank != size - 1) { 26 ierr = MPI_Send(in,n,MPIU_REAL,rank+1,837,comm);CHKERRQ(ierr); 27 } 28 if (rank == size-1) { 29 for (i=0; i<n; i++) out[i] = in[i]; 30 } else { 31 ierr = MPI_Recv(out,n,MPIU_REAL,rank+1,838,comm,&status);CHKERRQ(ierr); 32 } 33 if (rank) { 34 ierr = MPI_Send(out,n,MPIU_REAL,rank-1,838,comm);CHKERRQ(ierr); 35 } 36 return 0; 37 } 38 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "VecMDot_MPI" 42 PetscErrorCode VecMDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z) 43 { 44 PetscScalar awork[128],*work = awork; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 if (nv > 128) { 49 ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr); 50 } 51 ierr = VecMDot_Seq(nv,xin,y,work);CHKERRQ(ierr); 52 ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);CHKERRQ(ierr); 53 if (nv > 128) { 54 ierr = PetscFree(work);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "VecMTDot_MPI" 61 PetscErrorCode VecMTDot_MPI(PetscInt nv,Vec xin,const Vec y[],PetscScalar *z) 62 { 63 PetscScalar awork[128],*work = awork; 64 PetscErrorCode ierr; 65 66 PetscFunctionBegin; 67 if (nv > 128) { 68 ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr); 69 } 70 ierr = VecMTDot_Seq(nv,xin,y,work);CHKERRQ(ierr); 71 ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);CHKERRQ(ierr); 72 if (nv > 128) { 73 ierr = PetscFree(work);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "VecNorm_MPI" 80 PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z) 81 { 82 Vec_MPI *x = (Vec_MPI*)xin->data; 83 PetscReal sum,work = 0.0; 84 PetscScalar *xx = x->array; 85 PetscErrorCode ierr; 86 PetscInt n = xin->n; 87 88 PetscFunctionBegin; 89 if (type == NORM_2 || type == NORM_FROBENIUS) { 90 91 #if defined(PETSC_HAVE_SLOW_BLAS_NORM2) 92 #if defined(PETSC_USE_FORTRAN_KERNEL_NORM) 93 fortrannormsqr_(xx,&n,&work); 94 #elif defined(PETSC_USE_UNROLLED_NORM) 95 switch (n & 0x3) { 96 case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; 97 case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; 98 case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4; 99 } 100 while (n>0) { 101 work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+ 102 xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])); 103 xx += 4; n -= 4; 104 } 105 #else 106 {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));} 107 #endif 108 #else 109 {PetscBLASInt bn = (PetscBLASInt)n, one = 1; 110 work = BLASnrm2_(&bn,xx,&one); 111 work *= work; 112 } 113 #endif 114 ierr = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr); 115 *z = sqrt(sum); 116 ierr = PetscLogFlops(2*xin->n);CHKERRQ(ierr); 117 } else if (type == NORM_1) { 118 /* Find the local part */ 119 ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr); 120 /* Find the global max */ 121 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr); 122 } else if (type == NORM_INFINITY) { 123 /* Find the local max */ 124 ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr); 125 /* Find the global max */ 126 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);CHKERRQ(ierr); 127 } else if (type == NORM_1_AND_2) { 128 PetscReal temp[2]; 129 ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr); 130 ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr); 131 temp[1] = temp[1]*temp[1]; 132 ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,xin->comm);CHKERRQ(ierr); 133 z[1] = sqrt(z[1]); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 /* 139 These two functions are the MPI reduction operation used for max and min with index 140 The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the 141 MPI operator Vec[Max,Min]_Local_Op. 142 */ 143 MPI_Op VecMax_Local_Op = 0; 144 MPI_Op VecMin_Local_Op = 0; 145 146 EXTERN_C_BEGIN 147 #undef __FUNCT__ 148 #define __FUNCT__ "VecMax_Local" 149 void PETSCVEC_DLLEXPORT VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype) 150 { 151 PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out; 152 153 PetscFunctionBegin; 154 if (*datatype != MPIU_REAL) { 155 (*PetscErrorPrintf)("Can only handle MPIU_REAL data types"); 156 MPI_Abort(MPI_COMM_WORLD,1); 157 } 158 if (xin[0] > xout[0]) { 159 xout[0] = xin[0]; 160 xout[1] = xin[1]; 161 } 162 PetscFunctionReturnVoid(); /* cannot return a value */ 163 } 164 EXTERN_C_END 165 166 EXTERN_C_BEGIN 167 #undef __FUNCT__ 168 #define __FUNCT__ "VecMin_Local" 169 void PETSCVEC_DLLEXPORT VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype) 170 { 171 PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out; 172 173 PetscFunctionBegin; 174 if (*datatype != MPIU_REAL) { 175 (*PetscErrorPrintf)("Can only handle MPIU_REAL data types"); 176 MPI_Abort(MPI_COMM_WORLD,1); 177 } 178 if (xin[0] < xout[0]) { 179 xout[0] = xin[0]; 180 xout[1] = xin[1]; 181 } 182 PetscFunctionReturnVoid(); 183 } 184 EXTERN_C_END 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "VecMax_MPI" 188 PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z) 189 { 190 PetscErrorCode ierr; 191 PetscReal work; 192 193 PetscFunctionBegin; 194 /* Find the local max */ 195 ierr = VecMax_Seq(xin,idx,&work);CHKERRQ(ierr); 196 197 /* Find the global max */ 198 if (!idx) { 199 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,xin->comm);CHKERRQ(ierr); 200 } else { 201 PetscReal work2[2],z2[2]; 202 PetscInt rstart; 203 204 ierr = VecGetOwnershipRange(xin,&rstart,PETSC_NULL);CHKERRQ(ierr); 205 work2[0] = work; 206 work2[1] = *idx + rstart; 207 ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,xin->comm);CHKERRQ(ierr); 208 *z = z2[0]; 209 *idx = (PetscInt)z2[1]; 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "VecMin_MPI" 216 PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z) 217 { 218 PetscErrorCode ierr; 219 PetscReal work; 220 221 PetscFunctionBegin; 222 /* Find the local Min */ 223 ierr = VecMin_Seq(xin,idx,&work);CHKERRQ(ierr); 224 225 /* Find the global Min */ 226 if (!idx) { 227 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,xin->comm);CHKERRQ(ierr); 228 } else { 229 PetscReal work2[2],z2[2]; 230 PetscInt rstart; 231 232 ierr = VecGetOwnershipRange(xin,&rstart,PETSC_NULL);CHKERRQ(ierr); 233 work2[0] = work; 234 work2[1] = *idx + rstart; 235 ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,xin->comm);CHKERRQ(ierr); 236 *z = z2[0]; 237 *idx = (PetscInt)z2[1]; 238 } 239 PetscFunctionReturn(0); 240 } 241 242 243 244 245 246 247 248 249