1 2 /* 3 This is included by sbaij.c to generate unsigned short and regular versions of these two functions 4 */ 5 #undef USHORT 6 #if defined(USESHORT) 7 #define USHORT _ushort 8 #else 9 #define USHORT 10 #endif 11 #define PETSCMAP1_a(a,b) a ## b 12 #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b) 13 #define PETSCMAP1(a) PETSCMAP1_b(a,USHORT) 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMult_SeqSBAIJ_1_Hermitian" 17 PetscErrorCode PETSCMAP1(MatMult_SeqSBAIJ_1_Hermitian)(Mat A,Vec xx,Vec zz) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 const PetscScalar *x; 21 PetscScalar *z,x1,sum; 22 const MatScalar *v; 23 MatScalar vj; 24 PetscErrorCode ierr; 25 PetscInt mbs=a->mbs,i,j,nz; 26 const PetscInt *ai=a->i; 27 #if defined(USESHORT) 28 const unsigned short *ib=a->jshort; 29 unsigned short ibt; 30 #else 31 const PetscInt *ib=a->j; 32 PetscInt ibt; 33 #endif 34 35 PetscFunctionBegin; 36 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 37 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 38 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 39 40 v = a->a; 41 for (i=0; i<mbs; i++) { 42 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 43 x1 = x[i]; 44 sum = v[0]*x1; /* diagonal term */ 45 for (j=1; j<nz; j++) { 46 ibt = ib[j]; /* crash here running './ex127 -n 1000' ??? */ 47 vj = v[j]; 48 sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */ 49 50 vj = PetscConj(v[j]); 51 z[ibt] += vj * x1; /* (strict lower triangular part of A)*x */ 52 } 53 z[i] += sum; 54 v += nz; 55 ib += nz; 56 } 57 58 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 59 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 60 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMult_SeqSBAIJ_1" 66 PetscErrorCode PETSCMAP1(MatMult_SeqSBAIJ_1)(Mat A,Vec xx,Vec zz) 67 { 68 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 69 const PetscScalar *x; 70 PetscScalar *z,x1,sum; 71 const MatScalar *v; 72 MatScalar vj; 73 PetscErrorCode ierr; 74 PetscInt mbs=a->mbs,i,j,nz; 75 const PetscInt *ai=a->i; 76 #if defined(USESHORT) 77 const unsigned short *ib=a->jshort; 78 unsigned short ibt; 79 #else 80 const PetscInt *ib=a->j; 81 PetscInt ibt; 82 #endif 83 84 PetscFunctionBegin; 85 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 86 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 87 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 88 89 v = a->a; 90 for (i=0; i<mbs; i++) { 91 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 92 x1 = x[i]; 93 sum = v[0]*x1; /* diagonal term */ 94 for (j=1; j<nz; j++) { 95 ibt = ib[j]; 96 vj = v[j]; 97 z[ibt] += vj * x1; /* (strict lower triangular part of A)*x */ 98 sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */ 99 } 100 z[i] += sum; 101 v += nz; 102 ib += nz; 103 } 104 105 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 106 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 107 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatSOR_SeqSBAIJ" 113 PetscErrorCode PETSCMAP1(MatSOR_SeqSBAIJ)(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 const MatScalar *aa=a->a,*v,*v1,*aidiag; 117 PetscScalar *x,*t,sum; 118 const PetscScalar *b; 119 MatScalar tmp; 120 PetscErrorCode ierr; 121 PetscInt m=a->mbs,bs=A->rmap->bs,j; 122 const PetscInt *ai=a->i; 123 #if defined(USESHORT) 124 const unsigned short *aj=a->jshort,*vj,*vj1; 125 #else 126 const PetscInt *aj=a->j,*vj,*vj1; 127 #endif 128 PetscInt nz,nz1,i; 129 130 PetscFunctionBegin; 131 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 132 133 its = its*lits; 134 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 135 136 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 137 138 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 139 if (xx != bb) { 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 } else { 142 b = x; 143 } 144 145 if (!a->idiagvalid) { 146 if (!a->idiag) { 147 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 148 } 149 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 150 a->idiagvalid = PETSC_TRUE; 151 } 152 153 if (!a->sor_work) { 154 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 155 } 156 t = a->sor_work; 157 158 aidiag = a->idiag; 159 160 if (flag == SOR_APPLY_UPPER) { 161 /* apply (U + D/omega) to the vector */ 162 PetscScalar d; 163 for (i=0; i<m; i++) { 164 d = fshift + aa[ai[i]]; 165 nz = ai[i+1] - ai[i] - 1; 166 vj = aj + ai[i] + 1; 167 v = aa + ai[i] + 1; 168 sum = b[i]*d/omega; 169 PetscSparseDensePlusDot(sum,b,v,vj,nz); 170 x[i] = sum; 171 } 172 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 173 } 174 175 if (flag & SOR_ZERO_INITIAL_GUESS) { 176 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 177 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 178 179 v = aa + 1; 180 vj = aj + 1; 181 for (i=0; i<m; i++){ 182 nz = ai[i+1] - ai[i] - 1; 183 tmp = - (x[i] = omega*t[i]*aidiag[i]); 184 for (j=0; j<nz; j++) { 185 t[vj[j]] += tmp*v[j]; 186 } 187 v += nz + 1; 188 vj += nz + 1; 189 } 190 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 191 } 192 193 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 194 int nz2; 195 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 196 #if defined(PETSC_USE_BACKWARD_LOOP) 197 v = aa + ai[m] - 1; 198 vj = aj + ai[m] - 1; 199 for (i=m-1; i>=0; i--){ 200 sum = b[i]; 201 nz = ai[i+1] - ai[i] - 1; 202 {PetscInt __i;for(__i=0;__i<nz;__i++) sum -= v[-__i] * x[vj[-__i]];} 203 #else 204 v = aa + ai[m-1] + 1; 205 vj = aj + ai[m-1] + 1; 206 nz = 0; 207 for (i=m-1; i>=0; i--){ 208 sum = b[i]; 209 nz2 = ai[i] - ai[i-1] - 1; 210 PETSC_Prefetch(v-nz2-1,0,1); 211 PETSC_Prefetch(vj-nz2-1,0,1); 212 PetscSparseDenseMinusDot(sum,x,v,vj,nz); 213 nz = nz2; 214 #endif 215 x[i] = omega*sum*aidiag[i]; 216 v -= nz + 1; 217 vj -= nz + 1; 218 } 219 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 220 } else { 221 v = aa + ai[m-1] + 1; 222 vj = aj + ai[m-1] + 1; 223 nz = 0; 224 for (i=m-1; i>=0; i--){ 225 sum = t[i]; 226 nz2 = ai[i] - ai[i-1] - 1; 227 PETSC_Prefetch(v-nz2-1,0,1); 228 PETSC_Prefetch(vj-nz2-1,0,1); 229 PetscSparseDenseMinusDot(sum,x,v,vj,nz); 230 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 231 nz = nz2; 232 v -= nz + 1; 233 vj -= nz + 1; 234 } 235 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 236 } 237 } 238 its--; 239 } 240 241 while (its--) { 242 /* 243 forward sweep: 244 for i=0,...,m-1: 245 sum[i] = (b[i] - U(i,:)x )/d[i]; 246 x[i] = (1-omega)x[i] + omega*sum[i]; 247 b = b - x[i]*U^T(i,:); 248 249 */ 250 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 251 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 252 253 for (i=0; i<m; i++){ 254 v = aa + ai[i] + 1; v1=v; 255 vj = aj + ai[i] + 1; vj1=vj; 256 nz = ai[i+1] - ai[i] - 1; nz1=nz; 257 sum = t[i]; 258 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 259 while (nz1--) sum -= (*v1++)*x[*vj1++]; 260 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 261 while (nz--) t[*vj++] -= x[i]*(*v++); 262 } 263 } 264 265 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 266 /* 267 backward sweep: 268 b = b - x[i]*U^T(i,:), i=0,...,n-2 269 for i=m-1,...,0: 270 sum[i] = (b[i] - U(i,:)x )/d[i]; 271 x[i] = (1-omega)x[i] + omega*sum[i]; 272 */ 273 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 274 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 275 276 for (i=0; i<m-1; i++){ /* update rhs */ 277 v = aa + ai[i] + 1; 278 vj = aj + ai[i] + 1; 279 nz = ai[i+1] - ai[i] - 1; 280 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 281 while (nz--) t[*vj++] -= x[i]*(*v++); 282 } 283 for (i=m-1; i>=0; i--){ 284 v = aa + ai[i] + 1; 285 vj = aj + ai[i] + 1; 286 nz = ai[i+1] - ai[i] - 1; 287 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 288 sum = t[i]; 289 while (nz--) sum -= x[*vj++]*(*v++); 290 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 291 } 292 } 293 } 294 295 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 296 if (bb != xx) { 297 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 298 } 299 PetscFunctionReturn(0); 300 } 301