1 #define PETSCVEC_DLL 2 /* 3 Code for some of the parallel vector primatives. 4 */ 5 #include "src/vec/vec/impls/mpi/pvecimpl.h" 6 #include "src/inline/dot.h" 7 #include "petscblaslapack.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "VecMDot_MPI" 11 PetscErrorCode VecMDot_MPI(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z) 12 { 13 PetscScalar awork[128],*work = awork; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 if (nv > 128) { 18 ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr); 19 } 20 ierr = VecMDot_Seq(xin,nv,y,work);CHKERRQ(ierr); 21 ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,((PetscObject)xin)->comm);CHKERRQ(ierr); 22 if (nv > 128) { 23 ierr = PetscFree(work);CHKERRQ(ierr); 24 } 25 PetscFunctionReturn(0); 26 } 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "VecMTDot_MPI" 30 PetscErrorCode VecMTDot_MPI(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z) 31 { 32 PetscScalar awork[128],*work = awork; 33 PetscErrorCode ierr; 34 35 PetscFunctionBegin; 36 if (nv > 128) { 37 ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr); 38 } 39 ierr = VecMTDot_Seq(xin,nv,y,work);CHKERRQ(ierr); 40 ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,((PetscObject)xin)->comm);CHKERRQ(ierr); 41 if (nv > 128) { 42 ierr = PetscFree(work);CHKERRQ(ierr); 43 } 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "VecNorm_MPI" 49 PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z) 50 { 51 Vec_MPI *x = (Vec_MPI*)xin->data; 52 PetscReal sum,work = 0.0; 53 PetscScalar *xx = x->array; 54 PetscErrorCode ierr; 55 PetscInt n = xin->map->n; 56 57 PetscFunctionBegin; 58 if (type == NORM_2 || type == NORM_FROBENIUS) { 59 60 #if defined(PETSC_HAVE_SLOW_BLAS_NORM2) 61 #if defined(PETSC_USE_FORTRAN_KERNEL_NORM) 62 fortrannormsqr_(xx,&n,&work); 63 #elif defined(PETSC_USE_UNROLLED_NORM) 64 switch (n & 0x3) { 65 case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; 66 case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; 67 case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4; 68 } 69 while (n>0) { 70 work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+ 71 xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])); 72 xx += 4; n -= 4; 73 } 74 #else 75 {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));} 76 #endif 77 #else 78 {PetscBLASInt one = 1,bn = PetscBLASIntCast(n); 79 work = BLASnrm2_(&bn,xx,&one); 80 work *= work; 81 } 82 #endif 83 ierr = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); 84 *z = sqrt(sum); 85 ierr = PetscLogFlops(2*xin->map->n);CHKERRQ(ierr); 86 } else if (type == NORM_1) { 87 /* Find the local part */ 88 ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr); 89 /* Find the global max */ 90 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); 91 } else if (type == NORM_INFINITY) { 92 /* Find the local max */ 93 ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr); 94 /* Find the global max */ 95 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);CHKERRQ(ierr); 96 } else if (type == NORM_1_AND_2) { 97 PetscReal temp[2]; 98 ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr); 99 ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr); 100 temp[1] = temp[1]*temp[1]; 101 ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); 102 z[1] = sqrt(z[1]); 103 } 104 PetscFunctionReturn(0); 105 } 106 107 /* 108 These two functions are the MPI reduction operation used for max and min with index 109 The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the 110 MPI operator Vec[Max,Min]_Local_Op. 111 */ 112 MPI_Op VecMax_Local_Op = 0; 113 MPI_Op VecMin_Local_Op = 0; 114 115 EXTERN_C_BEGIN 116 #undef __FUNCT__ 117 #define __FUNCT__ "VecMax_Local" 118 void PETSCVEC_DLLEXPORT MPIAPI VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype) 119 { 120 PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out; 121 122 PetscFunctionBegin; 123 if (*datatype != MPIU_REAL) { 124 (*PetscErrorPrintf)("Can only handle MPIU_REAL data types"); 125 MPI_Abort(MPI_COMM_WORLD,1); 126 } 127 if (xin[0] > xout[0]) { 128 xout[0] = xin[0]; 129 xout[1] = xin[1]; 130 } 131 PetscFunctionReturnVoid(); /* cannot return a value */ 132 } 133 EXTERN_C_END 134 135 EXTERN_C_BEGIN 136 #undef __FUNCT__ 137 #define __FUNCT__ "VecMin_Local" 138 void PETSCVEC_DLLEXPORT MPIAPI VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype) 139 { 140 PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out; 141 142 PetscFunctionBegin; 143 if (*datatype != MPIU_REAL) { 144 (*PetscErrorPrintf)("Can only handle MPIU_REAL data types"); 145 MPI_Abort(MPI_COMM_WORLD,1); 146 } 147 if (xin[0] < xout[0]) { 148 xout[0] = xin[0]; 149 xout[1] = xin[1]; 150 } 151 PetscFunctionReturnVoid(); 152 } 153 EXTERN_C_END 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "VecMax_MPI" 157 PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z) 158 { 159 PetscErrorCode ierr; 160 PetscReal work; 161 162 PetscFunctionBegin; 163 /* Find the local max */ 164 ierr = VecMax_Seq(xin,idx,&work);CHKERRQ(ierr); 165 166 /* Find the global max */ 167 if (!idx) { 168 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);CHKERRQ(ierr); 169 } else { 170 PetscReal work2[2],z2[2]; 171 PetscInt rstart; 172 rstart = xin->map->rstart; 173 work2[0] = work; 174 work2[1] = *idx + rstart; 175 ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,((PetscObject)xin)->comm);CHKERRQ(ierr); 176 *z = z2[0]; 177 *idx = (PetscInt)z2[1]; 178 } 179 PetscFunctionReturn(0); 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "VecMin_MPI" 184 PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z) 185 { 186 PetscErrorCode ierr; 187 PetscReal work; 188 189 PetscFunctionBegin; 190 /* Find the local Min */ 191 ierr = VecMin_Seq(xin,idx,&work);CHKERRQ(ierr); 192 193 /* Find the global Min */ 194 if (!idx) { 195 ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,((PetscObject)xin)->comm);CHKERRQ(ierr); 196 } else { 197 PetscReal work2[2],z2[2]; 198 PetscInt rstart; 199 200 ierr = VecGetOwnershipRange(xin,&rstart,PETSC_NULL);CHKERRQ(ierr); 201 work2[0] = work; 202 work2[1] = *idx + rstart; 203 ierr = MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,((PetscObject)xin)->comm);CHKERRQ(ierr); 204 *z = z2[0]; 205 *idx = (PetscInt)z2[1]; 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 212 213 214 215 216 217