1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 83 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 84 { 85 PetscErrorCode ierr; 86 PetscInt nz,*vj,k; 87 PetscInt bs2 = bs*bs; 88 MatScalar *v,*diag; 89 PetscScalar *xk,*xj,*xk_tmp; 90 91 PetscFunctionBegin; 92 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 93 for (k=0; k<mbs; k++){ 94 v = aa + bs2*ai[k]; 95 xk = x + k*bs; /* Dk*xk = k-th block of x */ 96 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 97 nz = ai[k+1] - ai[k]; 98 vj = aj + ai[k]; 99 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 100 while (nz--) { 101 /* x(:) += U(k,:)^T*(Dk*xk) */ 102 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 103 vj++; xj = x + (*vj)*bs; 104 v += bs2; 105 } 106 /* xk = inv(Dk)*(Dk*xk) */ 107 diag = aa+k*bs2; /* ptr to inv(Dk) */ 108 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 109 } 110 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 116 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 117 { 118 PetscInt nz,*vj,k; 119 PetscInt bs2 = bs*bs; 120 MatScalar *v; 121 PetscScalar *xk,*xj; 122 123 PetscFunctionBegin; 124 for (k=mbs-1; k>=0; k--){ 125 v = aa + bs2*ai[k]; 126 xk = x + k*bs; /* xk */ 127 nz = ai[k+1] - ai[k]; 128 vj = aj + ai[k]; 129 xj = x + (*vj)*bs; 130 while (nz--) { 131 /* xk += U(k,:)*x(:) */ 132 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 133 vj++; 134 v += bs2; xj = x + (*vj)*bs; 135 } 136 } 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 142 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 143 { 144 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 145 PetscErrorCode ierr; 146 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 147 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 148 MatScalar *aa=a->a; 149 PetscScalar *x,*b; 150 151 PetscFunctionBegin; 152 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 154 155 /* solve U^T * D * y = b by forward substitution */ 156 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 157 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 158 159 /* solve U*x = y by back substitution */ 160 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 161 162 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 163 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 164 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 170 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 171 { 172 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 173 PetscErrorCode ierr; 174 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 175 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 176 MatScalar *aa=a->a; 177 PetscScalar *x,*b; 178 179 PetscFunctionBegin; 180 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 182 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 183 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 184 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 186 ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 192 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 193 { 194 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 195 PetscErrorCode ierr; 196 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 197 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 198 MatScalar *aa=a->a; 199 PetscScalar *x,*b; 200 201 PetscFunctionBegin; 202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 204 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 205 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 206 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 214 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 215 { 216 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 217 IS isrow=a->row; 218 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 219 PetscErrorCode ierr; 220 PetscInt nz,*vj,k,*r,idx; 221 MatScalar *aa=a->a,*v,*d; 222 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 223 224 PetscFunctionBegin; 225 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 226 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 227 t = a->solve_work; 228 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 229 230 /* solve U^T * D * y = b by forward substitution */ 231 tp = t; 232 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 233 idx = 7*r[k]; 234 tp[0] = b[idx]; 235 tp[1] = b[idx+1]; 236 tp[2] = b[idx+2]; 237 tp[3] = b[idx+3]; 238 tp[4] = b[idx+4]; 239 tp[5] = b[idx+5]; 240 tp[6] = b[idx+6]; 241 tp += 7; 242 } 243 244 for (k=0; k<mbs; k++){ 245 v = aa + 49*ai[k]; 246 vj = aj + ai[k]; 247 tp = t + k*7; 248 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 249 nz = ai[k+1] - ai[k]; 250 tp = t + (*vj)*7; 251 while (nz--) { 252 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 253 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 254 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 255 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 256 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 257 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 258 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 259 vj++; tp = t + (*vj)*7; 260 v += 49; 261 } 262 263 /* xk = inv(Dk)*(Dk*xk) */ 264 d = aa+k*49; /* ptr to inv(Dk) */ 265 tp = t + k*7; 266 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 267 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 268 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 269 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 270 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 271 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 272 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 273 } 274 275 /* solve U*x = y by back substitution */ 276 for (k=mbs-1; k>=0; k--){ 277 v = aa + 49*ai[k]; 278 vj = aj + ai[k]; 279 tp = t + k*7; 280 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 281 nz = ai[k+1] - ai[k]; 282 283 tp = t + (*vj)*7; 284 while (nz--) { 285 /* xk += U(k,:)*x(:) */ 286 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 287 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 288 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 289 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 290 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 291 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 292 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 293 vj++; tp = t + (*vj)*7; 294 v += 49; 295 } 296 tp = t + k*7; 297 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 298 idx = 7*r[k]; 299 x[idx] = x0; 300 x[idx+1] = x1; 301 x[idx+2] = x2; 302 x[idx+3] = x3; 303 x[idx+4] = x4; 304 x[idx+5] = x5; 305 x[idx+6] = x6; 306 } 307 308 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 309 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 310 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 311 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 317 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 318 { 319 MatScalar *v,*d; 320 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 321 PetscInt nz,*vj,k; 322 323 PetscFunctionBegin; 324 for (k=0; k<mbs; k++){ 325 v = aa + 49*ai[k]; 326 xp = x + k*7; 327 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 328 nz = ai[k+1] - ai[k]; 329 vj = aj + ai[k]; 330 xp = x + (*vj)*7; 331 while (nz--) { 332 /* x(:) += U(k,:)^T*(Dk*xk) */ 333 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 334 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 335 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 336 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 337 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 338 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 339 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 340 vj++; xp = x + (*vj)*7; 341 v += 49; 342 } 343 /* xk = inv(Dk)*(Dk*xk) */ 344 d = aa+k*49; /* ptr to inv(Dk) */ 345 xp = x + k*7; 346 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 347 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 348 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 349 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 350 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 351 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 352 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 359 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 360 { 361 MatScalar *v; 362 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 363 PetscInt nz,*vj,k; 364 365 PetscFunctionBegin; 366 for (k=mbs-1; k>=0; k--){ 367 v = aa + 49*ai[k]; 368 xp = x + k*7; 369 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 370 nz = ai[k+1] - ai[k]; 371 vj = aj + ai[k]; 372 xp = x + (*vj)*7; 373 while (nz--) { 374 /* xk += U(k,:)*x(:) */ 375 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 376 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 377 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 378 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 379 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 380 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 381 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 382 vj++; 383 v += 49; xp = x + (*vj)*7; 384 } 385 xp = x + k*7; 386 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 387 } 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 393 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 394 { 395 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 396 PetscErrorCode ierr; 397 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 398 MatScalar *aa=a->a; 399 PetscScalar *x,*b; 400 401 PetscFunctionBegin; 402 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 403 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 404 405 /* solve U^T * D * y = b by forward substitution */ 406 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 407 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 408 409 /* solve U*x = y by back substitution */ 410 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 411 412 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 413 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 414 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 420 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 421 { 422 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 423 PetscErrorCode ierr; 424 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 425 MatScalar *aa=a->a; 426 PetscScalar *x,*b; 427 428 PetscFunctionBegin; 429 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 430 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 431 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 432 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 433 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 434 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 435 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 441 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 442 { 443 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 444 PetscErrorCode ierr; 445 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 446 MatScalar *aa=a->a; 447 PetscScalar *x,*b; 448 449 PetscFunctionBegin; 450 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 451 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 452 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 453 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 454 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 455 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 456 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 462 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 463 { 464 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 465 IS isrow=a->row; 466 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 467 PetscErrorCode ierr; 468 PetscInt nz,*vj,k,*r,idx; 469 MatScalar *aa=a->a,*v,*d; 470 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 471 472 PetscFunctionBegin; 473 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 474 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 475 t = a->solve_work; 476 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 477 478 /* solve U^T * D * y = b by forward substitution */ 479 tp = t; 480 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 481 idx = 6*r[k]; 482 tp[0] = b[idx]; 483 tp[1] = b[idx+1]; 484 tp[2] = b[idx+2]; 485 tp[3] = b[idx+3]; 486 tp[4] = b[idx+4]; 487 tp[5] = b[idx+5]; 488 tp += 6; 489 } 490 491 for (k=0; k<mbs; k++){ 492 v = aa + 36*ai[k]; 493 vj = aj + ai[k]; 494 tp = t + k*6; 495 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 496 nz = ai[k+1] - ai[k]; 497 tp = t + (*vj)*6; 498 while (nz--) { 499 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 500 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 501 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 502 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 503 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 504 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 505 vj++; tp = t + (*vj)*6; 506 v += 36; 507 } 508 509 /* xk = inv(Dk)*(Dk*xk) */ 510 d = aa+k*36; /* ptr to inv(Dk) */ 511 tp = t + k*6; 512 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 513 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 514 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 515 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 516 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 517 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 518 } 519 520 /* solve U*x = y by back substitution */ 521 for (k=mbs-1; k>=0; k--){ 522 v = aa + 36*ai[k]; 523 vj = aj + ai[k]; 524 tp = t + k*6; 525 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 526 nz = ai[k+1] - ai[k]; 527 528 tp = t + (*vj)*6; 529 while (nz--) { 530 /* xk += U(k,:)*x(:) */ 531 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 532 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 533 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 534 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 535 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 536 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 537 vj++; tp = t + (*vj)*6; 538 v += 36; 539 } 540 tp = t + k*6; 541 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 542 idx = 6*r[k]; 543 x[idx] = x0; 544 x[idx+1] = x1; 545 x[idx+2] = x2; 546 x[idx+3] = x3; 547 x[idx+4] = x4; 548 x[idx+5] = x5; 549 } 550 551 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 552 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 553 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 554 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 560 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 561 { 562 MatScalar *v,*d; 563 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 564 PetscInt nz,*vj,k; 565 566 PetscFunctionBegin; 567 for (k=0; k<mbs; k++){ 568 v = aa + 36*ai[k]; 569 xp = x + k*6; 570 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 571 nz = ai[k+1] - ai[k]; 572 vj = aj + ai[k]; 573 xp = x + (*vj)*6; 574 while (nz--) { 575 /* x(:) += U(k,:)^T*(Dk*xk) */ 576 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 577 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 578 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 579 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 580 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 581 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 582 vj++; xp = x + (*vj)*6; 583 v += 36; 584 } 585 /* xk = inv(Dk)*(Dk*xk) */ 586 d = aa+k*36; /* ptr to inv(Dk) */ 587 xp = x + k*6; 588 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 589 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 590 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 591 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 592 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 593 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 594 } 595 PetscFunctionReturn(0); 596 } 597 #undef __FUNCT__ 598 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 599 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 600 { 601 MatScalar *v; 602 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 603 PetscInt nz,*vj,k; 604 605 PetscFunctionBegin; 606 for (k=mbs-1; k>=0; k--){ 607 v = aa + 36*ai[k]; 608 xp = x + k*6; 609 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 610 nz = ai[k+1] - ai[k]; 611 vj = aj + ai[k]; 612 xp = x + (*vj)*6; 613 while (nz--) { 614 /* xk += U(k,:)*x(:) */ 615 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 616 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 617 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 618 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 619 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 620 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 621 vj++; 622 v += 36; xp = x + (*vj)*6; 623 } 624 xp = x + k*6; 625 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 626 } 627 PetscFunctionReturn(0); 628 } 629 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 633 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 634 { 635 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 636 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 637 MatScalar *aa=a->a; 638 PetscScalar *x,*b; 639 PetscErrorCode ierr; 640 641 PetscFunctionBegin; 642 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 643 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 644 645 /* solve U^T * D * y = b by forward substitution */ 646 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 647 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 648 649 /* solve U*x = y by back substitution */ 650 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 651 652 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 653 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 654 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 660 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 661 { 662 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 663 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 664 MatScalar *aa=a->a; 665 PetscScalar *x,*b; 666 PetscErrorCode ierr; 667 668 PetscFunctionBegin; 669 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 670 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 671 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 672 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 673 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 674 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 675 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 681 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 682 { 683 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 684 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 685 MatScalar *aa=a->a; 686 PetscScalar *x,*b; 687 PetscErrorCode ierr; 688 689 PetscFunctionBegin; 690 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 692 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 693 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 694 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 695 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 696 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 702 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 703 { 704 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 705 IS isrow=a->row; 706 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 707 PetscErrorCode ierr; 708 PetscInt nz,*vj,k,*r,idx; 709 MatScalar *aa=a->a,*v,*diag; 710 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 711 712 PetscFunctionBegin; 713 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 714 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 715 t = a->solve_work; 716 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 717 718 /* solve U^T * D * y = b by forward substitution */ 719 tp = t; 720 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 721 idx = 5*r[k]; 722 tp[0] = b[idx]; 723 tp[1] = b[idx+1]; 724 tp[2] = b[idx+2]; 725 tp[3] = b[idx+3]; 726 tp[4] = b[idx+4]; 727 tp += 5; 728 } 729 730 for (k=0; k<mbs; k++){ 731 v = aa + 25*ai[k]; 732 vj = aj + ai[k]; 733 tp = t + k*5; 734 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 735 nz = ai[k+1] - ai[k]; 736 737 tp = t + (*vj)*5; 738 while (nz--) { 739 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 740 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 741 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 742 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 743 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 744 vj++; tp = t + (*vj)*5; 745 v += 25; 746 } 747 748 /* xk = inv(Dk)*(Dk*xk) */ 749 diag = aa+k*25; /* ptr to inv(Dk) */ 750 tp = t + k*5; 751 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 752 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 753 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 754 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 755 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 756 } 757 758 /* solve U*x = y by back substitution */ 759 for (k=mbs-1; k>=0; k--){ 760 v = aa + 25*ai[k]; 761 vj = aj + ai[k]; 762 tp = t + k*5; 763 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 764 nz = ai[k+1] - ai[k]; 765 766 tp = t + (*vj)*5; 767 while (nz--) { 768 /* xk += U(k,:)*x(:) */ 769 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 770 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 771 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 772 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 773 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 774 vj++; tp = t + (*vj)*5; 775 v += 25; 776 } 777 tp = t + k*5; 778 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 779 idx = 5*r[k]; 780 x[idx] = x0; 781 x[idx+1] = x1; 782 x[idx+2] = x2; 783 x[idx+3] = x3; 784 x[idx+4] = x4; 785 } 786 787 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 796 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 797 { 798 MatScalar *v,*diag; 799 PetscScalar *xp,x0,x1,x2,x3,x4; 800 PetscInt nz,*vj,k; 801 802 PetscFunctionBegin; 803 for (k=0; k<mbs; k++){ 804 v = aa + 25*ai[k]; 805 xp = x + k*5; 806 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 807 nz = ai[k+1] - ai[k]; 808 vj = aj + ai[k]; 809 xp = x + (*vj)*5; 810 while (nz--) { 811 /* x(:) += U(k,:)^T*(Dk*xk) */ 812 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 813 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 814 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 815 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 816 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 817 vj++; xp = x + (*vj)*5; 818 v += 25; 819 } 820 /* xk = inv(Dk)*(Dk*xk) */ 821 diag = aa+k*25; /* ptr to inv(Dk) */ 822 xp = x + k*5; 823 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 824 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 825 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 826 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 827 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 834 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 835 { 836 MatScalar *v; 837 PetscScalar *xp,x0,x1,x2,x3,x4; 838 PetscInt nz,*vj,k; 839 840 PetscFunctionBegin; 841 for (k=mbs-1; k>=0; k--){ 842 v = aa + 25*ai[k]; 843 xp = x + k*5; 844 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 845 nz = ai[k+1] - ai[k]; 846 vj = aj + ai[k]; 847 xp = x + (*vj)*5; 848 while (nz--) { 849 /* xk += U(k,:)*x(:) */ 850 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 851 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 852 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 853 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 854 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 855 vj++; 856 v += 25; xp = x + (*vj)*5; 857 } 858 xp = x + k*5; 859 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 860 } 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 866 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 867 { 868 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 869 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 870 MatScalar *aa=a->a; 871 PetscScalar *x,*b; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 876 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 877 878 /* solve U^T * D * y = b by forward substitution */ 879 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 880 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 881 882 /* solve U*x = y by back substitution */ 883 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 884 885 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 893 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 894 { 895 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 896 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 897 MatScalar *aa=a->a; 898 PetscScalar *x,*b; 899 PetscErrorCode ierr; 900 901 PetscFunctionBegin; 902 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 903 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 904 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 905 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 906 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 907 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 908 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 914 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 915 { 916 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 917 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 918 MatScalar *aa=a->a; 919 PetscScalar *x,*b; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 924 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 925 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 926 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 927 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 935 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 936 { 937 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 938 IS isrow=a->row; 939 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 940 PetscErrorCode ierr; 941 PetscInt nz,*vj,k,*r,idx; 942 MatScalar *aa=a->a,*v,*diag; 943 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 944 945 PetscFunctionBegin; 946 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 947 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 948 t = a->solve_work; 949 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 950 951 /* solve U^T * D * y = b by forward substitution */ 952 tp = t; 953 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 954 idx = 4*r[k]; 955 tp[0] = b[idx]; 956 tp[1] = b[idx+1]; 957 tp[2] = b[idx+2]; 958 tp[3] = b[idx+3]; 959 tp += 4; 960 } 961 962 for (k=0; k<mbs; k++){ 963 v = aa + 16*ai[k]; 964 vj = aj + ai[k]; 965 tp = t + k*4; 966 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 967 nz = ai[k+1] - ai[k]; 968 969 tp = t + (*vj)*4; 970 while (nz--) { 971 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 972 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 973 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 974 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 975 vj++; tp = t + (*vj)*4; 976 v += 16; 977 } 978 979 /* xk = inv(Dk)*(Dk*xk) */ 980 diag = aa+k*16; /* ptr to inv(Dk) */ 981 tp = t + k*4; 982 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 983 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 984 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 985 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 986 } 987 988 /* solve U*x = y by back substitution */ 989 for (k=mbs-1; k>=0; k--){ 990 v = aa + 16*ai[k]; 991 vj = aj + ai[k]; 992 tp = t + k*4; 993 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 994 nz = ai[k+1] - ai[k]; 995 996 tp = t + (*vj)*4; 997 while (nz--) { 998 /* xk += U(k,:)*x(:) */ 999 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1000 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1001 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1002 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1003 vj++; tp = t + (*vj)*4; 1004 v += 16; 1005 } 1006 tp = t + k*4; 1007 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1008 idx = 4*r[k]; 1009 x[idx] = x0; 1010 x[idx+1] = x1; 1011 x[idx+2] = x2; 1012 x[idx+3] = x3; 1013 } 1014 1015 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1018 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1024 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1025 { 1026 MatScalar *v,*diag; 1027 PetscScalar *xp,x0,x1,x2,x3; 1028 PetscInt nz,*vj,k; 1029 1030 PetscFunctionBegin; 1031 for (k=0; k<mbs; k++){ 1032 v = aa + 16*ai[k]; 1033 xp = x + k*4; 1034 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1035 nz = ai[k+1] - ai[k]; 1036 vj = aj + ai[k]; 1037 xp = x + (*vj)*4; 1038 while (nz--) { 1039 /* x(:) += U(k,:)^T*(Dk*xk) */ 1040 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1041 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1042 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1043 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1044 vj++; xp = x + (*vj)*4; 1045 v += 16; 1046 } 1047 /* xk = inv(Dk)*(Dk*xk) */ 1048 diag = aa+k*16; /* ptr to inv(Dk) */ 1049 xp = x + k*4; 1050 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1051 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1052 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1053 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1060 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1061 { 1062 MatScalar *v; 1063 PetscScalar *xp,x0,x1,x2,x3; 1064 PetscInt nz,*vj,k; 1065 1066 PetscFunctionBegin; 1067 for (k=mbs-1; k>=0; k--){ 1068 v = aa + 16*ai[k]; 1069 xp = x + k*4; 1070 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1071 nz = ai[k+1] - ai[k]; 1072 vj = aj + ai[k]; 1073 xp = x + (*vj)*4; 1074 while (nz--) { 1075 /* xk += U(k,:)*x(:) */ 1076 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1077 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1078 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1079 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1080 vj++; 1081 v += 16; xp = x + (*vj)*4; 1082 } 1083 xp = x + k*4; 1084 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1091 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1092 { 1093 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1094 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1095 MatScalar *aa=a->a; 1096 PetscScalar *x,*b; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1101 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1102 1103 /* solve U^T * D * y = b by forward substitution */ 1104 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1105 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1106 1107 /* solve U*x = y by back substitution */ 1108 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1109 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1110 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1111 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1117 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1118 { 1119 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1120 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1121 MatScalar *aa=a->a; 1122 PetscScalar *x,*b; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1127 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1128 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1129 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1130 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1132 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1138 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1139 { 1140 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1141 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1142 MatScalar *aa=a->a; 1143 PetscScalar *x,*b; 1144 PetscErrorCode ierr; 1145 1146 PetscFunctionBegin; 1147 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1148 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1149 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1150 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1151 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1152 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1153 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1159 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1160 { 1161 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1162 IS isrow=a->row; 1163 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1164 PetscErrorCode ierr; 1165 PetscInt nz,*vj,k,*r,idx; 1166 MatScalar *aa=a->a,*v,*diag; 1167 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1168 1169 PetscFunctionBegin; 1170 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1172 t = a->solve_work; 1173 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1174 1175 /* solve U^T * D * y = b by forward substitution */ 1176 tp = t; 1177 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1178 idx = 3*r[k]; 1179 tp[0] = b[idx]; 1180 tp[1] = b[idx+1]; 1181 tp[2] = b[idx+2]; 1182 tp += 3; 1183 } 1184 1185 for (k=0; k<mbs; k++){ 1186 v = aa + 9*ai[k]; 1187 vj = aj + ai[k]; 1188 tp = t + k*3; 1189 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1190 nz = ai[k+1] - ai[k]; 1191 1192 tp = t + (*vj)*3; 1193 while (nz--) { 1194 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1195 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1196 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1197 vj++; tp = t + (*vj)*3; 1198 v += 9; 1199 } 1200 1201 /* xk = inv(Dk)*(Dk*xk) */ 1202 diag = aa+k*9; /* ptr to inv(Dk) */ 1203 tp = t + k*3; 1204 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1205 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1206 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1207 } 1208 1209 /* solve U*x = y by back substitution */ 1210 for (k=mbs-1; k>=0; k--){ 1211 v = aa + 9*ai[k]; 1212 vj = aj + ai[k]; 1213 tp = t + k*3; 1214 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1215 nz = ai[k+1] - ai[k]; 1216 1217 tp = t + (*vj)*3; 1218 while (nz--) { 1219 /* xk += U(k,:)*x(:) */ 1220 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1221 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1222 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1223 vj++; tp = t + (*vj)*3; 1224 v += 9; 1225 } 1226 tp = t + k*3; 1227 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1228 idx = 3*r[k]; 1229 x[idx] = x0; 1230 x[idx+1] = x1; 1231 x[idx+2] = x2; 1232 } 1233 1234 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1235 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1236 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1237 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1243 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1244 { 1245 MatScalar *v,*diag; 1246 PetscScalar *xp,x0,x1,x2; 1247 PetscInt nz,*vj,k; 1248 1249 PetscFunctionBegin; 1250 for (k=0; k<mbs; k++){ 1251 v = aa + 9*ai[k]; 1252 xp = x + k*3; 1253 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1254 nz = ai[k+1] - ai[k]; 1255 vj = aj + ai[k]; 1256 xp = x + (*vj)*3; 1257 while (nz--) { 1258 /* x(:) += U(k,:)^T*(Dk*xk) */ 1259 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1260 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1261 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1262 vj++; xp = x + (*vj)*3; 1263 v += 9; 1264 } 1265 /* xk = inv(Dk)*(Dk*xk) */ 1266 diag = aa+k*9; /* ptr to inv(Dk) */ 1267 xp = x + k*3; 1268 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1269 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1270 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1277 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1278 { 1279 MatScalar *v; 1280 PetscScalar *xp,x0,x1,x2; 1281 PetscInt nz,*vj,k; 1282 1283 PetscFunctionBegin; 1284 for (k=mbs-1; k>=0; k--){ 1285 v = aa + 9*ai[k]; 1286 xp = x + k*3; 1287 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1288 nz = ai[k+1] - ai[k]; 1289 vj = aj + ai[k]; 1290 xp = x + (*vj)*3; 1291 while (nz--) { 1292 /* xk += U(k,:)*x(:) */ 1293 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1294 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1295 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1296 vj++; 1297 v += 9; xp = x + (*vj)*3; 1298 } 1299 xp = x + k*3; 1300 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1301 } 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1307 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1308 { 1309 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1310 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1311 MatScalar *aa=a->a; 1312 PetscScalar *x,*b; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1317 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1318 1319 /* solve U^T * D * y = b by forward substitution */ 1320 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1321 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1322 1323 /* solve U*x = y by back substitution */ 1324 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1325 1326 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1327 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1328 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1334 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1335 { 1336 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1337 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1338 MatScalar *aa=a->a; 1339 PetscScalar *x,*b; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1344 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1345 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1346 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1347 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1348 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1349 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1355 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1356 { 1357 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1358 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1359 MatScalar *aa=a->a; 1360 PetscScalar *x,*b; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1365 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1366 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1367 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1369 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1370 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1376 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1377 { 1378 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1379 IS isrow=a->row; 1380 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1381 PetscErrorCode ierr; 1382 PetscInt nz,*vj,k,k2,*r,idx; 1383 MatScalar *aa=a->a,*v,*diag; 1384 PetscScalar *x,*b,x0,x1,*t; 1385 1386 PetscFunctionBegin; 1387 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1388 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1389 t = a->solve_work; 1390 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1391 1392 /* solve U^T * D * y = perm(b) by forward substitution */ 1393 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1394 idx = 2*r[k]; 1395 t[k*2] = b[idx]; 1396 t[k*2+1] = b[idx+1]; 1397 } 1398 for (k=0; k<mbs; k++){ 1399 v = aa + 4*ai[k]; 1400 vj = aj + ai[k]; 1401 k2 = k*2; 1402 x0 = t[k2]; x1 = t[k2+1]; 1403 nz = ai[k+1] - ai[k]; 1404 while (nz--) { 1405 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1406 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1407 vj++; v += 4; 1408 } 1409 diag = aa+k*4; /* ptr to inv(Dk) */ 1410 t[k2] = diag[0]*x0 + diag[2]*x1; 1411 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1412 } 1413 1414 /* solve U*x = y by back substitution */ 1415 for (k=mbs-1; k>=0; k--){ 1416 v = aa + 4*ai[k]; 1417 vj = aj + ai[k]; 1418 k2 = k*2; 1419 x0 = t[k2]; x1 = t[k2+1]; 1420 nz = ai[k+1] - ai[k]; 1421 while (nz--) { 1422 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1423 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1424 vj++; v += 4; 1425 } 1426 t[k2] = x0; 1427 t[k2+1] = x1; 1428 idx = 2*r[k]; 1429 x[idx] = x0; 1430 x[idx+1] = x1; 1431 } 1432 1433 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1434 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1435 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1436 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1442 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1443 { 1444 MatScalar *v,*diag; 1445 PetscScalar x0,x1; 1446 PetscInt nz,*vj,k,k2; 1447 1448 PetscFunctionBegin; 1449 for (k=0; k<mbs; k++){ 1450 v = aa + 4*ai[k]; 1451 vj = aj + ai[k]; 1452 k2 = k*2; 1453 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1454 nz = ai[k+1] - ai[k]; 1455 1456 while (nz--) { 1457 /* x(:) += U(k,:)^T*(Dk*xk) */ 1458 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1459 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1460 vj++; v += 4; 1461 } 1462 /* xk = inv(Dk)*(Dk*xk) */ 1463 diag = aa+k*4; /* ptr to inv(Dk) */ 1464 x[k2] = diag[0]*x0 + diag[2]*x1; 1465 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1472 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1473 { 1474 MatScalar *v; 1475 PetscScalar x0,x1; 1476 PetscInt nz,*vj,k,k2; 1477 1478 PetscFunctionBegin; 1479 for (k=mbs-1; k>=0; k--){ 1480 v = aa + 4*ai[k]; 1481 vj = aj + ai[k]; 1482 k2 = k*2; 1483 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1484 nz = ai[k+1] - ai[k]; 1485 while (nz--) { 1486 /* xk += U(k,:)*x(:) */ 1487 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1488 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1489 vj++; v += 4; 1490 } 1491 x[k2] = x0; 1492 x[k2+1] = x1; 1493 } 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1499 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1500 { 1501 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1502 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1503 MatScalar *aa=a->a; 1504 PetscScalar *x,*b; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1510 1511 /* solve U^T * D * y = b by forward substitution */ 1512 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1513 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1514 1515 /* solve U*x = y by back substitution */ 1516 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1517 1518 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1520 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1526 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1527 { 1528 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1529 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1530 MatScalar *aa=a->a; 1531 PetscScalar *x,*b; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1537 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1538 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1539 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1541 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1547 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1548 { 1549 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1550 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1551 MatScalar *aa=a->a; 1552 PetscScalar *x,*b; 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1557 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1558 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1559 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1560 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1562 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1568 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1569 { 1570 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1571 IS isrow=a->row; 1572 PetscErrorCode ierr; 1573 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1574 MatScalar *aa=a->a,*v; 1575 PetscScalar *x,*b,xk,*t; 1576 PetscInt nz,*vj,k; 1577 1578 PetscFunctionBegin; 1579 if (!mbs) PetscFunctionReturn(0); 1580 /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */ 1581 1582 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1583 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1584 t = a->solve_work; 1585 1586 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1587 1588 /* solve U^T*D*y = perm(b) by forward substitution */ 1589 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1590 for (k=0; k<mbs; k++){ 1591 v = aa + ai[k] + 1; 1592 vj = aj + ai[k] + 1; 1593 xk = t[k]; 1594 nz = ai[k+1] - ai[k] - 1; 1595 while (nz--) t[*vj++] += (*v++) * xk; 1596 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1597 } 1598 1599 /* solve U*x = y by back substitution */ 1600 for (k=mbs-1; k>=0; k--){ 1601 v = aa + ai[k] + 1; 1602 vj = aj + ai[k] + 1; 1603 xk = t[k]; 1604 nz = ai[k+1] - ai[k] - 1; 1605 while (nz--) xk += (*v++) * t[*vj++]; 1606 t[k] = xk; 1607 x[rip[k]] = xk; 1608 } 1609 1610 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1611 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1613 ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1619 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1620 { 1621 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1622 PetscErrorCode ierr; 1623 1624 PetscFunctionBegin; 1625 if (A->rmap.bs == 1) { 1626 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1627 } else { 1628 IS isrow=a->row; 1629 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1630 MatScalar *aa=a->a,*v; 1631 PetscScalar *x,*b,*t; 1632 PetscInt nz,*vj,k,n; 1633 if (bb->n > a->solves_work_n) { 1634 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1635 ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1636 a->solves_work_n = bb->n; 1637 } 1638 n = bb->n; 1639 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1640 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1641 t = a->solves_work; 1642 1643 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1644 1645 /* solve U^T*D*y = perm(b) by forward substitution */ 1646 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1647 for (k=0; k<mbs; k++){ 1648 v = aa + ai[k]; 1649 vj = aj + ai[k]; 1650 nz = ai[k+1] - ai[k]; 1651 while (nz--) { 1652 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1653 v++;vj++; 1654 } 1655 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1656 } 1657 1658 /* solve U*x = y by back substitution */ 1659 for (k=mbs-1; k>=0; k--){ 1660 v = aa + ai[k]; 1661 vj = aj + ai[k]; 1662 nz = ai[k+1] - ai[k]; 1663 while (nz--) { 1664 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1665 v++;vj++; 1666 } 1667 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1668 } 1669 1670 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1671 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1672 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1673 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr); 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /* 1679 Special case where the matrix was ILU(0) factored in the natural 1680 ordering. This eliminates the need for the column and row permutation. 1681 */ 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1684 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1685 { 1686 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1687 PetscErrorCode ierr; 1688 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1689 MatScalar *aa=a->a,*v; 1690 PetscScalar *x,*b,xk; 1691 PetscInt nz,*vj,k; 1692 1693 PetscFunctionBegin; 1694 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1695 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1696 1697 /* solve U^T*D*y = b by forward substitution */ 1698 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1699 for (k=0; k<mbs; k++){ 1700 v = aa + ai[k] + 1; 1701 vj = aj + ai[k] + 1; 1702 xk = x[k]; 1703 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1704 while (nz--) x[*vj++] += (*v++) * xk; 1705 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1706 } 1707 1708 /* solve U*x = y by back substitution */ 1709 for (k=mbs-2; k>=0; k--){ 1710 v = aa + ai[k] + 1; 1711 vj = aj + ai[k] + 1; 1712 xk = x[k]; 1713 nz = ai[k+1] - ai[k] - 1; 1714 while (nz--) xk += (*v++) * x[*vj++]; 1715 x[k] = xk; 1716 } 1717 1718 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1719 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1720 ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1726 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1727 { 1728 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1729 PetscErrorCode ierr; 1730 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1731 MatScalar *aa=a->a,*v; 1732 PetscScalar *x,*b,xk; 1733 PetscInt nz,*vj,k; 1734 1735 PetscFunctionBegin; 1736 /* solve U^T*D*x = b by forward substitution */ 1737 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1738 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1739 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1740 for (k=0; k<mbs; k++){ 1741 v = aa + ai[k] + 1; 1742 vj = aj + ai[k] + 1; 1743 xk = x[k]; 1744 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1745 while (nz--) x[*vj++] += (*v++) * xk; 1746 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1747 } 1748 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1750 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1756 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1757 { 1758 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1759 PetscErrorCode ierr; 1760 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1761 MatScalar *aa=a->a,*v; 1762 PetscScalar *x,*b,xk; 1763 PetscInt nz,*vj,k; 1764 1765 PetscFunctionBegin; 1766 /* solve U*x = b by back substitution */ 1767 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1768 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1769 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1770 for (k=mbs-2; k>=0; k--){ 1771 v = aa + ai[k] + 1; 1772 vj = aj + ai[k] + 1; 1773 xk = x[k]; 1774 nz = ai[k+1] - ai[k] - 1; 1775 while (nz--) xk += (*v++) * x[*vj++]; 1776 x[k] = xk; 1777 } 1778 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1779 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1780 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 1784 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1787 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1788 { 1789 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1790 PetscErrorCode ierr; 1791 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1792 PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2; 1793 PetscInt m,reallocs = 0,*levtmp; 1794 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1795 PetscInt incrlev,*lev,shift,prow,nz; 1796 PetscReal f = info->fill,levels = info->levels; 1797 PetscTruth perm_identity; 1798 1799 PetscFunctionBegin; 1800 /* check whether perm is the identity mapping */ 1801 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1802 1803 if (perm_identity){ 1804 a->permute = PETSC_FALSE; 1805 ai = a->i; aj = a->j; 1806 } else { /* non-trivial permutation */ 1807 a->permute = PETSC_TRUE; 1808 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1809 ai = a->inew; aj = a->jnew; 1810 } 1811 1812 /* initialization */ 1813 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1814 umax = (PetscInt)(f*ai[mbs] + 1); 1815 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1816 umax += mbs + 1; 1817 shift = mbs + 1; 1818 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1819 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1820 iu[0] = mbs + 1; 1821 juidx = mbs + 1; 1822 /* prowl: linked list for pivot row */ 1823 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1824 /* q: linked list for col index */ 1825 q = prowl + mbs; 1826 levtmp = q + mbs; 1827 1828 for (i=0; i<mbs; i++){ 1829 prowl[i] = mbs; 1830 q[i] = 0; 1831 } 1832 1833 /* for each row k */ 1834 for (k=0; k<mbs; k++){ 1835 nzk = 0; 1836 q[k] = mbs; 1837 /* copy current row into linked list */ 1838 nz = ai[rip[k]+1] - ai[rip[k]]; 1839 j = ai[rip[k]]; 1840 while (nz--){ 1841 vj = rip[aj[j++]]; 1842 if (vj > k){ 1843 qm = k; 1844 do { 1845 m = qm; qm = q[m]; 1846 } while(qm < vj); 1847 if (qm == vj) { 1848 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1849 } 1850 nzk++; 1851 q[m] = vj; 1852 q[vj] = qm; 1853 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1854 } 1855 } 1856 1857 /* modify nonzero structure of k-th row by computing fill-in 1858 for each row prow to be merged in */ 1859 prow = k; 1860 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1861 1862 while (prow < k){ 1863 /* merge row prow into k-th row */ 1864 jmin = iu[prow] + 1; 1865 jmax = iu[prow+1]; 1866 qm = k; 1867 for (j=jmin; j<jmax; j++){ 1868 incrlev = lev[j-shift] + 1; 1869 if (incrlev > levels) continue; 1870 vj = ju[j]; 1871 do { 1872 m = qm; qm = q[m]; 1873 } while (qm < vj); 1874 if (qm != vj){ /* a fill */ 1875 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1876 levtmp[vj] = incrlev; 1877 } else { 1878 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1879 } 1880 } 1881 prow = prowl[prow]; /* next pivot row */ 1882 } 1883 1884 /* add k to row list for first nonzero element in k-th row */ 1885 if (nzk > 1){ 1886 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1887 prowl[k] = prowl[i]; prowl[i] = k; 1888 } 1889 iu[k+1] = iu[k] + nzk; 1890 1891 /* allocate more space to ju and lev if needed */ 1892 if (iu[k+1] > umax) { 1893 /* estimate how much additional space we will need */ 1894 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1895 /* just double the memory each time */ 1896 maxadd = umax; 1897 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1898 umax += maxadd; 1899 1900 /* allocate a longer ju */ 1901 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1902 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1903 ierr = PetscFree(ju);CHKERRQ(ierr); 1904 ju = jutmp; 1905 1906 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1907 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1908 ierr = PetscFree(lev);CHKERRQ(ierr); 1909 lev = jutmp; 1910 reallocs += 2; /* count how many times we realloc */ 1911 } 1912 1913 /* save nonzero structure of k-th row in ju */ 1914 i=k; 1915 while (nzk --) { 1916 i = q[i]; 1917 ju[juidx] = i; 1918 lev[juidx-shift] = levtmp[i]; 1919 juidx++; 1920 } 1921 } 1922 1923 #if defined(PETSC_USE_INFO) 1924 if (ai[mbs] != 0) { 1925 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1926 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1927 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1928 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 1929 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1930 } else { 1931 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1932 } 1933 #endif 1934 1935 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1936 ierr = PetscFree(prowl);CHKERRQ(ierr); 1937 ierr = PetscFree(lev);CHKERRQ(ierr); 1938 1939 /* put together the new matrix */ 1940 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 1941 ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr); 1942 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1943 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1944 1945 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 1946 b = (Mat_SeqSBAIJ*)(*B)->data; 1947 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 1948 b->singlemalloc = PETSC_FALSE; 1949 b->free_a = PETSC_TRUE; 1950 b->free_ij = PETSC_TRUE; 1951 /* the next line frees the default space generated by the Create() */ 1952 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1953 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1954 b->j = ju; 1955 b->i = iu; 1956 b->diag = 0; 1957 b->ilen = 0; 1958 b->imax = 0; 1959 1960 if (b->row) { 1961 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1962 } 1963 if (b->icol) { 1964 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1965 } 1966 b->row = perm; 1967 b->icol = perm; 1968 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1969 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1970 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1971 /* In b structure: Free imax, ilen, old a, old j. 1972 Allocate idnew, solve_work, new a, new j */ 1973 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1974 b->maxnz = b->nz = iu[mbs]; 1975 1976 (*B)->factor = FACTOR_CHOLESKY; 1977 (*B)->info.factor_mallocs = reallocs; 1978 (*B)->info.fill_ratio_given = f; 1979 if (ai[mbs] != 0) { 1980 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1981 } else { 1982 (*B)->info.fill_ratio_needed = 0.0; 1983 } 1984 1985 if (perm_identity){ 1986 switch (bs) { 1987 case 1: 1988 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1989 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1990 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1991 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1992 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 1993 break; 1994 case 2: 1995 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1996 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1997 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1998 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 1999 break; 2000 case 3: 2001 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2002 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2003 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2004 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2005 break; 2006 case 4: 2007 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2008 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2009 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2010 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2011 break; 2012 case 5: 2013 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2014 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2015 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2016 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2017 break; 2018 case 6: 2019 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2020 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2021 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2022 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2023 break; 2024 case 7: 2025 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2026 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2027 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2028 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2029 break; 2030 default: 2031 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2032 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2033 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2034 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2035 break; 2036 } 2037 } 2038 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #include "petscbt.h" 2043 #include "src/mat/utils/freespace.h" 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2046 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 2047 { 2048 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2049 Mat_SeqSBAIJ *b; 2050 Mat B; 2051 PetscErrorCode ierr; 2052 PetscTruth perm_identity,free_ij = PETSC_TRUE; 2053 PetscInt bs=A->rmap.bs,am=a->mbs; 2054 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 2055 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2056 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2057 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2058 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2059 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2060 PetscBT lnkbt; 2061 PetscScalar *ua; 2062 2063 PetscFunctionBegin; 2064 /* 2065 This code originally uses Modified Sparse Row (MSR) storage 2066 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2067 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2068 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2069 thus the original code in MSR format is still used for these cases. 2070 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2071 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2072 */ 2073 if (bs > 1){ 2074 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* check whether perm is the identity mapping */ 2079 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2080 2081 /* special case that simply copies fill pattern */ 2082 if (!levels && perm_identity) { 2083 a->permute = PETSC_FALSE; 2084 /* reuse the column pointers and row offsets for memory savings */ 2085 ui = a->i; 2086 uj = a->j; 2087 free_ij = PETSC_FALSE; 2088 ratio_needed = 1.0; 2089 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2090 if (perm_identity){ 2091 a->permute = PETSC_FALSE; 2092 ai = a->i; aj = a->j; 2093 } else { /* non-trivial permutation */ 2094 a->permute = PETSC_TRUE; 2095 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2096 ai = a->inew; aj = a->jnew; 2097 } 2098 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2099 2100 /* initialization */ 2101 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2102 ui[0] = 0; 2103 2104 /* jl: linked list for storing indices of the pivot rows 2105 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2106 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2107 il = jl + am; 2108 uj_ptr = (PetscInt**)(il + am); 2109 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2110 for (i=0; i<am; i++){ 2111 jl[i] = am; il[i] = 0; 2112 } 2113 2114 /* create and initialize a linked list for storing column indices of the active row k */ 2115 nlnk = am + 1; 2116 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2117 2118 /* initial FreeSpace size is fill*(ai[am]+1) */ 2119 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2120 current_space = free_space; 2121 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2122 current_space_lvl = free_space_lvl; 2123 2124 for (k=0; k<am; k++){ /* for each active row k */ 2125 /* initialize lnk by the column indices of row rip[k] */ 2126 nzk = 0; 2127 ncols = ai[rip[k]+1] - ai[rip[k]]; 2128 cols = aj+ai[rip[k]]; 2129 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2130 nzk += nlnk; 2131 2132 /* update lnk by computing fill-in for each pivot row to be merged in */ 2133 prow = jl[k]; /* 1st pivot row */ 2134 2135 while (prow < k){ 2136 nextprow = jl[prow]; 2137 2138 /* merge prow into k-th row */ 2139 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2140 jmax = ui[prow+1]; 2141 ncols = jmax-jmin; 2142 i = jmin - ui[prow]; 2143 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2144 j = *(uj_lvl_ptr[prow] + i - 1); 2145 cols_lvl = uj_lvl_ptr[prow]+i; 2146 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2147 nzk += nlnk; 2148 2149 /* update il and jl for prow */ 2150 if (jmin < jmax){ 2151 il[prow] = jmin; 2152 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2153 } 2154 prow = nextprow; 2155 } 2156 2157 /* if free space is not available, make more free space */ 2158 if (current_space->local_remaining<nzk) { 2159 i = am - k + 1; /* num of unfactored rows */ 2160 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2161 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2162 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2163 reallocs++; 2164 } 2165 2166 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2167 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2168 2169 /* add the k-th row into il and jl */ 2170 if (nzk-1 > 0){ 2171 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2172 jl[k] = jl[i]; jl[i] = k; 2173 il[k] = ui[k] + 1; 2174 } 2175 uj_ptr[k] = current_space->array; 2176 uj_lvl_ptr[k] = current_space_lvl->array; 2177 2178 current_space->array += nzk; 2179 current_space->local_used += nzk; 2180 current_space->local_remaining -= nzk; 2181 current_space_lvl->array += nzk; 2182 current_space_lvl->local_used += nzk; 2183 current_space_lvl->local_remaining -= nzk; 2184 2185 ui[k+1] = ui[k] + nzk; 2186 } 2187 2188 #if defined(PETSC_USE_INFO) 2189 if (ai[am] != 0) { 2190 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2191 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2192 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2193 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2194 } else { 2195 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2196 } 2197 #endif 2198 2199 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2200 ierr = PetscFree(jl);CHKERRQ(ierr); 2201 2202 /* destroy list of free space and other temporary array(s) */ 2203 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2204 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2205 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2206 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2207 if (ai[am] != 0) { 2208 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2209 } else { 2210 ratio_needed = 0.0; 2211 } 2212 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2213 2214 /* put together the new matrix in MATSEQSBAIJ format */ 2215 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr); 2216 ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr); 2217 B = *fact; 2218 b = (Mat_SeqSBAIJ*)B->data; 2219 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2220 b->singlemalloc = PETSC_FALSE; 2221 b->free_a = PETSC_TRUE; 2222 b->free_ij = free_ij; 2223 b->diag = 0; 2224 b->ilen = 0; 2225 b->imax = 0; 2226 b->row = perm; 2227 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2228 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2229 b->icol = perm; 2230 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2231 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2232 b->maxnz = b->nz = ui[am]; 2233 2234 B->factor = FACTOR_CHOLESKY; 2235 B->info.factor_mallocs = reallocs; 2236 B->info.fill_ratio_given = fill; 2237 B->info.fill_ratio_needed = ratio_needed; 2238 2239 if (perm_identity){ 2240 switch (bs) { 2241 case 1: 2242 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2243 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2244 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2245 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2246 break; 2247 case 2: 2248 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2249 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2250 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2251 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2252 break; 2253 case 3: 2254 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2255 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2256 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2257 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2258 break; 2259 case 4: 2260 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2261 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2262 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2263 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2264 break; 2265 case 5: 2266 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2267 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2268 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2269 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2270 break; 2271 case 6: 2272 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2273 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2274 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2275 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2276 break; 2277 case 7: 2278 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2279 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2280 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2281 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2282 break; 2283 default: 2284 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2285 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2286 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2287 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2288 break; 2289 } 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294