1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 83 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 84 { 85 PetscErrorCode ierr; 86 PetscInt nz,*vj,k; 87 PetscInt bs2 = bs*bs; 88 MatScalar *v,*diag; 89 PetscScalar *xk,*xj,*xk_tmp; 90 91 PetscFunctionBegin; 92 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 93 for (k=0; k<mbs; k++){ 94 v = aa + bs2*ai[k]; 95 xk = x + k*bs; /* Dk*xk = k-th block of x */ 96 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 97 nz = ai[k+1] - ai[k]; 98 vj = aj + ai[k]; 99 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 100 while (nz--) { 101 /* x(:) += U(k,:)^T*(Dk*xk) */ 102 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 103 vj++; xj = x + (*vj)*bs; 104 v += bs2; 105 } 106 /* xk = inv(Dk)*(Dk*xk) */ 107 diag = aa+k*bs2; /* ptr to inv(Dk) */ 108 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 109 } 110 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 116 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 117 { 118 PetscErrorCode ierr; 119 PetscInt nz,*vj,k; 120 PetscInt bs2 = bs*bs; 121 MatScalar *v,*diag; 122 PetscScalar *xk,*xj; 123 124 PetscFunctionBegin; 125 for (k=mbs-1; k>=0; k--){ 126 v = aa + bs2*ai[k]; 127 xk = x + k*bs; /* xk */ 128 nz = ai[k+1] - ai[k]; 129 vj = aj + ai[k]; 130 xj = x + (*vj)*bs; 131 while (nz--) { 132 /* xk += U(k,:)*x(:) */ 133 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 134 vj++; 135 v += bs2; xj = x + (*vj)*bs; 136 } 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 143 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 144 { 145 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 146 PetscErrorCode ierr; 147 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 148 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 149 MatScalar *aa=a->a; 150 PetscScalar *x,*b; 151 152 PetscFunctionBegin; 153 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 155 156 /* solve U^T * D * y = b by forward substitution */ 157 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 158 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 159 160 /* solve U*x = y by back substitution */ 161 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 162 163 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 164 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 165 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 171 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 172 { 173 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 174 PetscErrorCode ierr; 175 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 176 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 177 MatScalar *aa=a->a; 178 PetscScalar *x,*b; 179 180 PetscFunctionBegin; 181 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 183 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 184 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 185 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 186 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 187 ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr); 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 193 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 194 { 195 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 196 PetscErrorCode ierr; 197 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 198 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 199 MatScalar *aa=a->a; 200 PetscScalar *x,*b; 201 202 PetscFunctionBegin; 203 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 204 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 205 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 206 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 207 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 208 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 209 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 215 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 216 { 217 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 218 IS isrow=a->row; 219 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 220 PetscErrorCode ierr; 221 PetscInt nz,*vj,k,*r,idx; 222 MatScalar *aa=a->a,*v,*d; 223 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 224 225 PetscFunctionBegin; 226 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 227 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 228 t = a->solve_work; 229 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 230 231 /* solve U^T * D * y = b by forward substitution */ 232 tp = t; 233 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 234 idx = 7*r[k]; 235 tp[0] = b[idx]; 236 tp[1] = b[idx+1]; 237 tp[2] = b[idx+2]; 238 tp[3] = b[idx+3]; 239 tp[4] = b[idx+4]; 240 tp[5] = b[idx+5]; 241 tp[6] = b[idx+6]; 242 tp += 7; 243 } 244 245 for (k=0; k<mbs; k++){ 246 v = aa + 49*ai[k]; 247 vj = aj + ai[k]; 248 tp = t + k*7; 249 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 250 nz = ai[k+1] - ai[k]; 251 tp = t + (*vj)*7; 252 while (nz--) { 253 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 254 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 255 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 256 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 257 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 258 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 259 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 260 vj++; tp = t + (*vj)*7; 261 v += 49; 262 } 263 264 /* xk = inv(Dk)*(Dk*xk) */ 265 d = aa+k*49; /* ptr to inv(Dk) */ 266 tp = t + k*7; 267 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 268 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 269 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 270 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 271 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 272 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 273 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 274 } 275 276 /* solve U*x = y by back substitution */ 277 for (k=mbs-1; k>=0; k--){ 278 v = aa + 49*ai[k]; 279 vj = aj + ai[k]; 280 tp = t + k*7; 281 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 282 nz = ai[k+1] - ai[k]; 283 284 tp = t + (*vj)*7; 285 while (nz--) { 286 /* xk += U(k,:)*x(:) */ 287 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 288 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 289 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 290 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 291 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 292 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 293 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 294 vj++; tp = t + (*vj)*7; 295 v += 49; 296 } 297 tp = t + k*7; 298 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 299 idx = 7*r[k]; 300 x[idx] = x0; 301 x[idx+1] = x1; 302 x[idx+2] = x2; 303 x[idx+3] = x3; 304 x[idx+4] = x4; 305 x[idx+5] = x5; 306 x[idx+6] = x6; 307 } 308 309 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 310 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 312 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 318 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 319 { 320 MatScalar *v,*d; 321 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 322 PetscInt nz,*vj,k; 323 324 PetscFunctionBegin; 325 for (k=0; k<mbs; k++){ 326 v = aa + 49*ai[k]; 327 xp = x + k*7; 328 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 329 nz = ai[k+1] - ai[k]; 330 vj = aj + ai[k]; 331 xp = x + (*vj)*7; 332 while (nz--) { 333 /* x(:) += U(k,:)^T*(Dk*xk) */ 334 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 335 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 336 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 337 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 338 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 339 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 340 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 341 vj++; xp = x + (*vj)*7; 342 v += 49; 343 } 344 /* xk = inv(Dk)*(Dk*xk) */ 345 d = aa+k*49; /* ptr to inv(Dk) */ 346 xp = x + k*7; 347 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 348 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 349 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 350 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 351 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 352 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 353 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 360 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 361 { 362 MatScalar *v,*d; 363 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 364 PetscInt nz,*vj,k; 365 366 PetscFunctionBegin; 367 for (k=mbs-1; k>=0; k--){ 368 v = aa + 49*ai[k]; 369 xp = x + k*7; 370 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 371 nz = ai[k+1] - ai[k]; 372 vj = aj + ai[k]; 373 xp = x + (*vj)*7; 374 while (nz--) { 375 /* xk += U(k,:)*x(:) */ 376 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 377 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 378 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 379 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 380 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 381 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 382 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 383 vj++; 384 v += 49; xp = x + (*vj)*7; 385 } 386 xp = x + k*7; 387 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 388 } 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 394 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 395 { 396 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 397 PetscErrorCode ierr; 398 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 399 MatScalar *aa=a->a; 400 PetscScalar *x,*b; 401 402 PetscFunctionBegin; 403 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 405 406 /* solve U^T * D * y = b by forward substitution */ 407 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 408 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 409 410 /* solve U*x = y by back substitution */ 411 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 412 413 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 415 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 421 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscErrorCode ierr; 425 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 426 MatScalar *aa=a->a; 427 PetscScalar *x,*b; 428 429 PetscFunctionBegin; 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 433 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 434 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 435 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 436 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 442 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 443 { 444 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 445 PetscErrorCode ierr; 446 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 447 MatScalar *aa=a->a; 448 PetscScalar *x,*b; 449 450 PetscFunctionBegin; 451 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 452 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 453 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 454 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 455 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 456 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 457 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 463 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 464 { 465 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 466 IS isrow=a->row; 467 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 468 PetscErrorCode ierr; 469 PetscInt nz,*vj,k,*r,idx; 470 MatScalar *aa=a->a,*v,*d; 471 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 472 473 PetscFunctionBegin; 474 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 475 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 476 t = a->solve_work; 477 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 478 479 /* solve U^T * D * y = b by forward substitution */ 480 tp = t; 481 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 482 idx = 6*r[k]; 483 tp[0] = b[idx]; 484 tp[1] = b[idx+1]; 485 tp[2] = b[idx+2]; 486 tp[3] = b[idx+3]; 487 tp[4] = b[idx+4]; 488 tp[5] = b[idx+5]; 489 tp += 6; 490 } 491 492 for (k=0; k<mbs; k++){ 493 v = aa + 36*ai[k]; 494 vj = aj + ai[k]; 495 tp = t + k*6; 496 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 497 nz = ai[k+1] - ai[k]; 498 tp = t + (*vj)*6; 499 while (nz--) { 500 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 501 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 502 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 503 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 504 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 505 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 506 vj++; tp = t + (*vj)*6; 507 v += 36; 508 } 509 510 /* xk = inv(Dk)*(Dk*xk) */ 511 d = aa+k*36; /* ptr to inv(Dk) */ 512 tp = t + k*6; 513 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 514 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 515 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 516 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 517 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 518 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 519 } 520 521 /* solve U*x = y by back substitution */ 522 for (k=mbs-1; k>=0; k--){ 523 v = aa + 36*ai[k]; 524 vj = aj + ai[k]; 525 tp = t + k*6; 526 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 527 nz = ai[k+1] - ai[k]; 528 529 tp = t + (*vj)*6; 530 while (nz--) { 531 /* xk += U(k,:)*x(:) */ 532 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 533 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 534 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 535 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 536 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 537 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 538 vj++; tp = t + (*vj)*6; 539 v += 36; 540 } 541 tp = t + k*6; 542 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 543 idx = 6*r[k]; 544 x[idx] = x0; 545 x[idx+1] = x1; 546 x[idx+2] = x2; 547 x[idx+3] = x3; 548 x[idx+4] = x4; 549 x[idx+5] = x5; 550 } 551 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 555 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 561 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 562 { 563 MatScalar *v,*d; 564 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 565 PetscInt nz,*vj,k; 566 567 PetscFunctionBegin; 568 for (k=0; k<mbs; k++){ 569 v = aa + 36*ai[k]; 570 xp = x + k*6; 571 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 572 nz = ai[k+1] - ai[k]; 573 vj = aj + ai[k]; 574 xp = x + (*vj)*6; 575 while (nz--) { 576 /* x(:) += U(k,:)^T*(Dk*xk) */ 577 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 578 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 579 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 580 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 581 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 582 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 583 vj++; xp = x + (*vj)*6; 584 v += 36; 585 } 586 /* xk = inv(Dk)*(Dk*xk) */ 587 d = aa+k*36; /* ptr to inv(Dk) */ 588 xp = x + k*6; 589 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 590 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 591 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 592 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 593 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 594 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 595 } 596 PetscFunctionReturn(0); 597 } 598 #undef __FUNCT__ 599 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 600 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 601 { 602 MatScalar *v,*d; 603 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 604 PetscInt nz,*vj,k; 605 606 PetscFunctionBegin; 607 for (k=mbs-1; k>=0; k--){ 608 v = aa + 36*ai[k]; 609 xp = x + k*6; 610 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 611 nz = ai[k+1] - ai[k]; 612 vj = aj + ai[k]; 613 xp = x + (*vj)*6; 614 while (nz--) { 615 /* xk += U(k,:)*x(:) */ 616 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 617 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 618 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 619 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 620 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 621 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 622 vj++; 623 v += 36; xp = x + (*vj)*6; 624 } 625 xp = x + k*6; 626 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 627 } 628 PetscFunctionReturn(0); 629 } 630 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 634 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 635 { 636 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 637 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 638 MatScalar *aa=a->a; 639 PetscScalar *x,*b; 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 644 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 645 646 /* solve U^T * D * y = b by forward substitution */ 647 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 648 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 649 650 /* solve U*x = y by back substitution */ 651 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 652 653 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 654 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 655 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 661 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 662 { 663 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 664 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 665 MatScalar *aa=a->a; 666 PetscScalar *x,*b; 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 671 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 672 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 673 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 674 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 675 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 676 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 682 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 683 { 684 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 685 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 686 MatScalar *aa=a->a; 687 PetscScalar *x,*b; 688 PetscErrorCode ierr; 689 690 PetscFunctionBegin; 691 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 692 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 693 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 694 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 695 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 696 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 697 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 703 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 704 { 705 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 706 IS isrow=a->row; 707 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 708 PetscErrorCode ierr; 709 PetscInt nz,*vj,k,*r,idx; 710 MatScalar *aa=a->a,*v,*diag; 711 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 712 713 PetscFunctionBegin; 714 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 715 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 716 t = a->solve_work; 717 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 718 719 /* solve U^T * D * y = b by forward substitution */ 720 tp = t; 721 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 722 idx = 5*r[k]; 723 tp[0] = b[idx]; 724 tp[1] = b[idx+1]; 725 tp[2] = b[idx+2]; 726 tp[3] = b[idx+3]; 727 tp[4] = b[idx+4]; 728 tp += 5; 729 } 730 731 for (k=0; k<mbs; k++){ 732 v = aa + 25*ai[k]; 733 vj = aj + ai[k]; 734 tp = t + k*5; 735 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 736 nz = ai[k+1] - ai[k]; 737 738 tp = t + (*vj)*5; 739 while (nz--) { 740 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 741 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 742 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 743 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 744 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 745 vj++; tp = t + (*vj)*5; 746 v += 25; 747 } 748 749 /* xk = inv(Dk)*(Dk*xk) */ 750 diag = aa+k*25; /* ptr to inv(Dk) */ 751 tp = t + k*5; 752 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 753 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 754 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 755 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 756 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 757 } 758 759 /* solve U*x = y by back substitution */ 760 for (k=mbs-1; k>=0; k--){ 761 v = aa + 25*ai[k]; 762 vj = aj + ai[k]; 763 tp = t + k*5; 764 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 765 nz = ai[k+1] - ai[k]; 766 767 tp = t + (*vj)*5; 768 while (nz--) { 769 /* xk += U(k,:)*x(:) */ 770 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 771 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 772 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 773 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 774 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 775 vj++; tp = t + (*vj)*5; 776 v += 25; 777 } 778 tp = t + k*5; 779 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 780 idx = 5*r[k]; 781 x[idx] = x0; 782 x[idx+1] = x1; 783 x[idx+2] = x2; 784 x[idx+3] = x3; 785 x[idx+4] = x4; 786 } 787 788 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 789 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 790 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 791 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 797 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 798 { 799 MatScalar *v,*diag; 800 PetscScalar *xp,x0,x1,x2,x3,x4; 801 PetscInt nz,*vj,k; 802 803 PetscFunctionBegin; 804 for (k=0; k<mbs; k++){ 805 v = aa + 25*ai[k]; 806 xp = x + k*5; 807 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 808 nz = ai[k+1] - ai[k]; 809 vj = aj + ai[k]; 810 xp = x + (*vj)*5; 811 while (nz--) { 812 /* x(:) += U(k,:)^T*(Dk*xk) */ 813 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 814 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 815 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 816 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 817 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 818 vj++; xp = x + (*vj)*5; 819 v += 25; 820 } 821 /* xk = inv(Dk)*(Dk*xk) */ 822 diag = aa+k*25; /* ptr to inv(Dk) */ 823 xp = x + k*5; 824 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 825 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 826 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 827 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 828 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 835 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 836 { 837 MatScalar *v,*diag; 838 PetscScalar *xp,x0,x1,x2,x3,x4; 839 PetscInt nz,*vj,k; 840 841 PetscFunctionBegin; 842 for (k=mbs-1; k>=0; k--){ 843 v = aa + 25*ai[k]; 844 xp = x + k*5; 845 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 846 nz = ai[k+1] - ai[k]; 847 vj = aj + ai[k]; 848 xp = x + (*vj)*5; 849 while (nz--) { 850 /* xk += U(k,:)*x(:) */ 851 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 852 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 853 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 854 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 855 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 856 vj++; 857 v += 25; xp = x + (*vj)*5; 858 } 859 xp = x + k*5; 860 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 861 } 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 867 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 868 { 869 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 870 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 871 MatScalar *aa=a->a; 872 PetscScalar *x,*b; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 879 /* solve U^T * D * y = b by forward substitution */ 880 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 881 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 882 883 /* solve U*x = y by back substitution */ 884 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 885 886 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 894 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 895 { 896 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 897 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 898 MatScalar *aa=a->a; 899 PetscScalar *x,*b; 900 PetscErrorCode ierr; 901 902 PetscFunctionBegin; 903 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 904 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 905 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 906 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 907 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 908 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 909 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 915 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 916 { 917 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 918 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 919 MatScalar *aa=a->a; 920 PetscScalar *x,*b; 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 925 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 926 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 927 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 928 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 929 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 930 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 936 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 937 { 938 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 939 IS isrow=a->row; 940 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 941 PetscErrorCode ierr; 942 PetscInt nz,*vj,k,*r,idx; 943 MatScalar *aa=a->a,*v,*diag; 944 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 945 946 PetscFunctionBegin; 947 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 t = a->solve_work; 950 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 951 952 /* solve U^T * D * y = b by forward substitution */ 953 tp = t; 954 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 955 idx = 4*r[k]; 956 tp[0] = b[idx]; 957 tp[1] = b[idx+1]; 958 tp[2] = b[idx+2]; 959 tp[3] = b[idx+3]; 960 tp += 4; 961 } 962 963 for (k=0; k<mbs; k++){ 964 v = aa + 16*ai[k]; 965 vj = aj + ai[k]; 966 tp = t + k*4; 967 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 968 nz = ai[k+1] - ai[k]; 969 970 tp = t + (*vj)*4; 971 while (nz--) { 972 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 973 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 974 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 975 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 976 vj++; tp = t + (*vj)*4; 977 v += 16; 978 } 979 980 /* xk = inv(Dk)*(Dk*xk) */ 981 diag = aa+k*16; /* ptr to inv(Dk) */ 982 tp = t + k*4; 983 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 984 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 985 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 986 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 987 } 988 989 /* solve U*x = y by back substitution */ 990 for (k=mbs-1; k>=0; k--){ 991 v = aa + 16*ai[k]; 992 vj = aj + ai[k]; 993 tp = t + k*4; 994 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 995 nz = ai[k+1] - ai[k]; 996 997 tp = t + (*vj)*4; 998 while (nz--) { 999 /* xk += U(k,:)*x(:) */ 1000 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1001 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1002 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1003 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1004 vj++; tp = t + (*vj)*4; 1005 v += 16; 1006 } 1007 tp = t + k*4; 1008 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1009 idx = 4*r[k]; 1010 x[idx] = x0; 1011 x[idx+1] = x1; 1012 x[idx+2] = x2; 1013 x[idx+3] = x3; 1014 } 1015 1016 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1018 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1019 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1025 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1026 { 1027 MatScalar *v,*diag; 1028 PetscScalar *xp,*b,x0,x1,x2,x3; 1029 PetscInt nz,*vj,k; 1030 1031 PetscFunctionBegin; 1032 for (k=0; k<mbs; k++){ 1033 v = aa + 16*ai[k]; 1034 xp = x + k*4; 1035 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1036 nz = ai[k+1] - ai[k]; 1037 vj = aj + ai[k]; 1038 xp = x + (*vj)*4; 1039 while (nz--) { 1040 /* x(:) += U(k,:)^T*(Dk*xk) */ 1041 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1042 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1043 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1044 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1045 vj++; xp = x + (*vj)*4; 1046 v += 16; 1047 } 1048 /* xk = inv(Dk)*(Dk*xk) */ 1049 diag = aa+k*16; /* ptr to inv(Dk) */ 1050 xp = x + k*4; 1051 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1052 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1053 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1054 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1061 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1062 { 1063 MatScalar *v,*diag; 1064 PetscScalar *xp,*b,x0,x1,x2,x3; 1065 PetscInt nz,*vj,k; 1066 1067 PetscFunctionBegin; 1068 for (k=mbs-1; k>=0; k--){ 1069 v = aa + 16*ai[k]; 1070 xp = x + k*4; 1071 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1072 nz = ai[k+1] - ai[k]; 1073 vj = aj + ai[k]; 1074 xp = x + (*vj)*4; 1075 while (nz--) { 1076 /* xk += U(k,:)*x(:) */ 1077 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1078 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1079 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1080 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1081 vj++; 1082 v += 16; xp = x + (*vj)*4; 1083 } 1084 xp = x + k*4; 1085 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1092 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1093 { 1094 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1095 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1096 MatScalar *aa=a->a; 1097 PetscScalar *x,*b; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1102 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1103 1104 /* solve U^T * D * y = b by forward substitution */ 1105 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1106 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1107 1108 /* solve U*x = y by back substitution */ 1109 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1110 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1111 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1112 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1118 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1119 { 1120 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1121 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1122 MatScalar *aa=a->a; 1123 PetscScalar *x,*b; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1128 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1129 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1130 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1133 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1139 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1140 { 1141 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1142 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1143 MatScalar *aa=a->a; 1144 PetscScalar *x,*b; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1150 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1151 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1152 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1153 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1154 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1160 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1161 { 1162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1163 IS isrow=a->row; 1164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1165 PetscErrorCode ierr; 1166 PetscInt nz,*vj,k,*r,idx; 1167 MatScalar *aa=a->a,*v,*diag; 1168 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1169 1170 PetscFunctionBegin; 1171 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1173 t = a->solve_work; 1174 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1175 1176 /* solve U^T * D * y = b by forward substitution */ 1177 tp = t; 1178 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1179 idx = 3*r[k]; 1180 tp[0] = b[idx]; 1181 tp[1] = b[idx+1]; 1182 tp[2] = b[idx+2]; 1183 tp += 3; 1184 } 1185 1186 for (k=0; k<mbs; k++){ 1187 v = aa + 9*ai[k]; 1188 vj = aj + ai[k]; 1189 tp = t + k*3; 1190 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1191 nz = ai[k+1] - ai[k]; 1192 1193 tp = t + (*vj)*3; 1194 while (nz--) { 1195 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1196 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1197 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1198 vj++; tp = t + (*vj)*3; 1199 v += 9; 1200 } 1201 1202 /* xk = inv(Dk)*(Dk*xk) */ 1203 diag = aa+k*9; /* ptr to inv(Dk) */ 1204 tp = t + k*3; 1205 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1206 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1207 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1208 } 1209 1210 /* solve U*x = y by back substitution */ 1211 for (k=mbs-1; k>=0; k--){ 1212 v = aa + 9*ai[k]; 1213 vj = aj + ai[k]; 1214 tp = t + k*3; 1215 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1216 nz = ai[k+1] - ai[k]; 1217 1218 tp = t + (*vj)*3; 1219 while (nz--) { 1220 /* xk += U(k,:)*x(:) */ 1221 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1222 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1223 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1224 vj++; tp = t + (*vj)*3; 1225 v += 9; 1226 } 1227 tp = t + k*3; 1228 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1229 idx = 3*r[k]; 1230 x[idx] = x0; 1231 x[idx+1] = x1; 1232 x[idx+2] = x2; 1233 } 1234 1235 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1236 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1237 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1238 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1244 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1245 { 1246 MatScalar *v,*diag; 1247 PetscScalar *xp,*b,x0,x1,x2; 1248 PetscInt nz,*vj,k; 1249 1250 PetscFunctionBegin; 1251 for (k=0; k<mbs; k++){ 1252 v = aa + 9*ai[k]; 1253 xp = x + k*3; 1254 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1255 nz = ai[k+1] - ai[k]; 1256 vj = aj + ai[k]; 1257 xp = x + (*vj)*3; 1258 while (nz--) { 1259 /* x(:) += U(k,:)^T*(Dk*xk) */ 1260 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1261 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1262 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1263 vj++; xp = x + (*vj)*3; 1264 v += 9; 1265 } 1266 /* xk = inv(Dk)*(Dk*xk) */ 1267 diag = aa+k*9; /* ptr to inv(Dk) */ 1268 xp = x + k*3; 1269 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1270 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1271 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1278 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1279 { 1280 MatScalar *v,*diag; 1281 PetscScalar *xp,*b,x0,x1,x2; 1282 PetscInt nz,*vj,k; 1283 1284 PetscFunctionBegin; 1285 for (k=mbs-1; k>=0; k--){ 1286 v = aa + 9*ai[k]; 1287 xp = x + k*3; 1288 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1289 nz = ai[k+1] - ai[k]; 1290 vj = aj + ai[k]; 1291 xp = x + (*vj)*3; 1292 while (nz--) { 1293 /* xk += U(k,:)*x(:) */ 1294 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1295 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1296 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1297 vj++; 1298 v += 9; xp = x + (*vj)*3; 1299 } 1300 xp = x + k*3; 1301 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1308 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1309 { 1310 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1311 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1312 MatScalar *aa=a->a; 1313 PetscScalar *x,*b; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1318 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1319 1320 /* solve U^T * D * y = b by forward substitution */ 1321 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1322 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1323 1324 /* solve U*x = y by back substitution */ 1325 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1326 1327 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1328 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1329 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1335 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1336 { 1337 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1338 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1339 MatScalar *aa=a->a; 1340 PetscScalar *x,*b; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1345 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1346 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1347 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1348 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1349 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1350 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1356 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1357 { 1358 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1359 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1360 MatScalar *aa=a->a; 1361 PetscScalar *x,*b; 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1366 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1367 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1368 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1369 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1371 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1377 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1378 { 1379 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1380 IS isrow=a->row; 1381 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1382 PetscErrorCode ierr; 1383 PetscInt nz,*vj,k,k2,*r,idx; 1384 MatScalar *aa=a->a,*v,*diag; 1385 PetscScalar *x,*b,x0,x1,*t; 1386 1387 PetscFunctionBegin; 1388 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1389 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1390 t = a->solve_work; 1391 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1392 1393 /* solve U^T * D * y = perm(b) by forward substitution */ 1394 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1395 idx = 2*r[k]; 1396 t[k*2] = b[idx]; 1397 t[k*2+1] = b[idx+1]; 1398 } 1399 for (k=0; k<mbs; k++){ 1400 v = aa + 4*ai[k]; 1401 vj = aj + ai[k]; 1402 k2 = k*2; 1403 x0 = t[k2]; x1 = t[k2+1]; 1404 nz = ai[k+1] - ai[k]; 1405 while (nz--) { 1406 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1407 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1408 vj++; v += 4; 1409 } 1410 diag = aa+k*4; /* ptr to inv(Dk) */ 1411 t[k2] = diag[0]*x0 + diag[2]*x1; 1412 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1413 } 1414 1415 /* solve U*x = y by back substitution */ 1416 for (k=mbs-1; k>=0; k--){ 1417 v = aa + 4*ai[k]; 1418 vj = aj + ai[k]; 1419 k2 = k*2; 1420 x0 = t[k2]; x1 = t[k2+1]; 1421 nz = ai[k+1] - ai[k]; 1422 while (nz--) { 1423 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1424 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1425 vj++; v += 4; 1426 } 1427 t[k2] = x0; 1428 t[k2+1] = x1; 1429 idx = 2*r[k]; 1430 x[idx] = x0; 1431 x[idx+1] = x1; 1432 } 1433 1434 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1435 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1437 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1443 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1444 { 1445 MatScalar *v,*diag; 1446 PetscScalar x0,x1; 1447 PetscInt nz,*vj,k,k2; 1448 1449 PetscFunctionBegin; 1450 for (k=0; k<mbs; k++){ 1451 v = aa + 4*ai[k]; 1452 vj = aj + ai[k]; 1453 k2 = k*2; 1454 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1455 nz = ai[k+1] - ai[k]; 1456 1457 while (nz--) { 1458 /* x(:) += U(k,:)^T*(Dk*xk) */ 1459 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1460 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1461 vj++; v += 4; 1462 } 1463 /* xk = inv(Dk)*(Dk*xk) */ 1464 diag = aa+k*4; /* ptr to inv(Dk) */ 1465 x[k2] = diag[0]*x0 + diag[2]*x1; 1466 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1473 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1474 { 1475 MatScalar *v,*diag; 1476 PetscScalar x0,x1; 1477 PetscInt nz,*vj,k,k2; 1478 1479 PetscFunctionBegin; 1480 for (k=mbs-1; k>=0; k--){ 1481 v = aa + 4*ai[k]; 1482 vj = aj + ai[k]; 1483 k2 = k*2; 1484 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1485 nz = ai[k+1] - ai[k]; 1486 while (nz--) { 1487 /* xk += U(k,:)*x(:) */ 1488 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1489 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1490 vj++; v += 4; 1491 } 1492 x[k2] = x0; 1493 x[k2+1] = x1; 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1500 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1501 { 1502 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1503 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1504 MatScalar *aa=a->a; 1505 PetscScalar *x,*b; 1506 PetscErrorCode ierr; 1507 1508 PetscFunctionBegin; 1509 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1510 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1511 1512 /* solve U^T * D * y = b by forward substitution */ 1513 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1514 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1515 1516 /* solve U*x = y by back substitution */ 1517 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1518 1519 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1520 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1521 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1527 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1528 { 1529 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1530 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1531 MatScalar *aa=a->a; 1532 PetscScalar *x,*b; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1537 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1538 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1539 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1540 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1541 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1542 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1548 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1549 { 1550 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1551 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1552 MatScalar *aa=a->a; 1553 PetscScalar *x,*b; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1558 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1559 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1560 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1561 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1563 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1569 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1570 { 1571 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1572 IS isrow=a->row; 1573 PetscErrorCode ierr; 1574 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1575 MatScalar *aa=a->a,*v; 1576 PetscScalar *x,*b,xk,*t; 1577 PetscInt nz,*vj,k; 1578 1579 PetscFunctionBegin; 1580 if (!mbs) PetscFunctionReturn(0); 1581 /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */ 1582 1583 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1584 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1585 t = a->solve_work; 1586 1587 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1588 1589 /* solve U^T*D*y = perm(b) by forward substitution */ 1590 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1591 for (k=0; k<mbs; k++){ 1592 v = aa + ai[k] + 1; 1593 vj = aj + ai[k] + 1; 1594 xk = t[k]; 1595 nz = ai[k+1] - ai[k] - 1; 1596 while (nz--) t[*vj++] += (*v++) * xk; 1597 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1598 } 1599 1600 /* solve U*x = y by back substitution */ 1601 for (k=mbs-1; k>=0; k--){ 1602 v = aa + ai[k] + 1; 1603 vj = aj + ai[k] + 1; 1604 xk = t[k]; 1605 nz = ai[k+1] - ai[k] - 1; 1606 while (nz--) xk += (*v++) * t[*vj++]; 1607 t[k] = xk; 1608 x[rip[k]] = xk; 1609 } 1610 1611 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1612 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1613 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1614 ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1620 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1621 { 1622 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 if (A->rmap.bs == 1) { 1627 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1628 } else { 1629 IS isrow=a->row; 1630 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1631 MatScalar *aa=a->a,*v; 1632 PetscScalar *x,*b,*t; 1633 PetscInt nz,*vj,k,n; 1634 if (bb->n > a->solves_work_n) { 1635 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1636 ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1637 a->solves_work_n = bb->n; 1638 } 1639 n = bb->n; 1640 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1641 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1642 t = a->solves_work; 1643 1644 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1645 1646 /* solve U^T*D*y = perm(b) by forward substitution */ 1647 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1648 for (k=0; k<mbs; k++){ 1649 v = aa + ai[k]; 1650 vj = aj + ai[k]; 1651 nz = ai[k+1] - ai[k]; 1652 while (nz--) { 1653 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1654 v++;vj++; 1655 } 1656 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1657 } 1658 1659 /* solve U*x = y by back substitution */ 1660 for (k=mbs-1; k>=0; k--){ 1661 v = aa + ai[k]; 1662 vj = aj + ai[k]; 1663 nz = ai[k+1] - ai[k]; 1664 while (nz--) { 1665 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1666 v++;vj++; 1667 } 1668 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1669 } 1670 1671 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1672 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1673 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1674 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /* 1680 Special case where the matrix was ILU(0) factored in the natural 1681 ordering. This eliminates the need for the column and row permutation. 1682 */ 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1685 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1686 { 1687 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1688 PetscErrorCode ierr; 1689 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1690 MatScalar *aa=a->a,*v; 1691 PetscScalar *x,*b,xk; 1692 PetscInt nz,*vj,k; 1693 1694 PetscFunctionBegin; 1695 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1696 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1697 1698 /* solve U^T*D*y = b by forward substitution */ 1699 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1700 for (k=0; k<mbs; k++){ 1701 v = aa + ai[k] + 1; 1702 vj = aj + ai[k] + 1; 1703 xk = x[k]; 1704 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1705 while (nz--) x[*vj++] += (*v++) * xk; 1706 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1707 } 1708 1709 /* solve U*x = y by back substitution */ 1710 for (k=mbs-2; k>=0; k--){ 1711 v = aa + ai[k] + 1; 1712 vj = aj + ai[k] + 1; 1713 xk = x[k]; 1714 nz = ai[k+1] - ai[k] - 1; 1715 while (nz--) xk += (*v++) * x[*vj++]; 1716 x[k] = xk; 1717 } 1718 1719 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1720 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1721 ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1727 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1728 { 1729 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1730 PetscErrorCode ierr; 1731 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1732 MatScalar *aa=a->a,*v; 1733 PetscScalar *x,*b,xk; 1734 PetscInt nz,*vj,k; 1735 1736 PetscFunctionBegin; 1737 /* solve U^T*D*x = b by forward substitution */ 1738 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1739 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1740 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1741 for (k=0; k<mbs; k++){ 1742 v = aa + ai[k] + 1; 1743 vj = aj + ai[k] + 1; 1744 xk = x[k]; 1745 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1746 while (nz--) x[*vj++] += (*v++) * xk; 1747 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1748 } 1749 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1750 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1751 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1757 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1758 { 1759 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1760 PetscErrorCode ierr; 1761 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1762 MatScalar *aa=a->a,*v; 1763 PetscScalar *x,*b,xk; 1764 PetscInt nz,*vj,k; 1765 1766 PetscFunctionBegin; 1767 /* solve U*x = b by back substitution */ 1768 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1769 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1770 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1771 for (k=mbs-2; k>=0; k--){ 1772 v = aa + ai[k] + 1; 1773 vj = aj + ai[k] + 1; 1774 xk = x[k]; 1775 nz = ai[k+1] - ai[k] - 1; 1776 while (nz--) xk += (*v++) * x[*vj++]; 1777 x[k] = xk; 1778 } 1779 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1780 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1781 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1788 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1789 { 1790 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1791 PetscErrorCode ierr; 1792 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1793 PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2; 1794 PetscInt m,reallocs = 0,*levtmp; 1795 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1796 PetscInt incrlev,*lev,shift,prow,nz; 1797 PetscReal f = info->fill,levels = info->levels; 1798 PetscTruth perm_identity; 1799 1800 PetscFunctionBegin; 1801 /* check whether perm is the identity mapping */ 1802 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1803 1804 if (perm_identity){ 1805 a->permute = PETSC_FALSE; 1806 ai = a->i; aj = a->j; 1807 } else { /* non-trivial permutation */ 1808 a->permute = PETSC_TRUE; 1809 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1810 ai = a->inew; aj = a->jnew; 1811 } 1812 1813 /* initialization */ 1814 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1815 umax = (PetscInt)(f*ai[mbs] + 1); 1816 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1817 umax += mbs + 1; 1818 shift = mbs + 1; 1819 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1820 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1821 iu[0] = mbs + 1; 1822 juidx = mbs + 1; 1823 /* prowl: linked list for pivot row */ 1824 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1825 /* q: linked list for col index */ 1826 q = prowl + mbs; 1827 levtmp = q + mbs; 1828 1829 for (i=0; i<mbs; i++){ 1830 prowl[i] = mbs; 1831 q[i] = 0; 1832 } 1833 1834 /* for each row k */ 1835 for (k=0; k<mbs; k++){ 1836 nzk = 0; 1837 q[k] = mbs; 1838 /* copy current row into linked list */ 1839 nz = ai[rip[k]+1] - ai[rip[k]]; 1840 j = ai[rip[k]]; 1841 while (nz--){ 1842 vj = rip[aj[j++]]; 1843 if (vj > k){ 1844 qm = k; 1845 do { 1846 m = qm; qm = q[m]; 1847 } while(qm < vj); 1848 if (qm == vj) { 1849 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1850 } 1851 nzk++; 1852 q[m] = vj; 1853 q[vj] = qm; 1854 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1855 } 1856 } 1857 1858 /* modify nonzero structure of k-th row by computing fill-in 1859 for each row prow to be merged in */ 1860 prow = k; 1861 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1862 1863 while (prow < k){ 1864 /* merge row prow into k-th row */ 1865 jmin = iu[prow] + 1; 1866 jmax = iu[prow+1]; 1867 qm = k; 1868 for (j=jmin; j<jmax; j++){ 1869 incrlev = lev[j-shift] + 1; 1870 if (incrlev > levels) continue; 1871 vj = ju[j]; 1872 do { 1873 m = qm; qm = q[m]; 1874 } while (qm < vj); 1875 if (qm != vj){ /* a fill */ 1876 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1877 levtmp[vj] = incrlev; 1878 } else { 1879 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1880 } 1881 } 1882 prow = prowl[prow]; /* next pivot row */ 1883 } 1884 1885 /* add k to row list for first nonzero element in k-th row */ 1886 if (nzk > 1){ 1887 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1888 prowl[k] = prowl[i]; prowl[i] = k; 1889 } 1890 iu[k+1] = iu[k] + nzk; 1891 1892 /* allocate more space to ju and lev if needed */ 1893 if (iu[k+1] > umax) { 1894 /* estimate how much additional space we will need */ 1895 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1896 /* just double the memory each time */ 1897 maxadd = umax; 1898 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1899 umax += maxadd; 1900 1901 /* allocate a longer ju */ 1902 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1903 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1904 ierr = PetscFree(ju);CHKERRQ(ierr); 1905 ju = jutmp; 1906 1907 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1908 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1909 ierr = PetscFree(lev);CHKERRQ(ierr); 1910 lev = jutmp; 1911 reallocs += 2; /* count how many times we realloc */ 1912 } 1913 1914 /* save nonzero structure of k-th row in ju */ 1915 i=k; 1916 while (nzk --) { 1917 i = q[i]; 1918 ju[juidx] = i; 1919 lev[juidx-shift] = levtmp[i]; 1920 juidx++; 1921 } 1922 } 1923 1924 #if defined(PETSC_USE_INFO) 1925 if (ai[mbs] != 0) { 1926 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1927 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1928 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1929 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 1930 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1931 } else { 1932 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1933 } 1934 #endif 1935 1936 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1937 ierr = PetscFree(prowl);CHKERRQ(ierr); 1938 ierr = PetscFree(lev);CHKERRQ(ierr); 1939 1940 /* put together the new matrix */ 1941 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 1942 ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr); 1943 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1944 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1945 1946 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 1947 b = (Mat_SeqSBAIJ*)(*B)->data; 1948 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 1949 b->singlemalloc = PETSC_FALSE; 1950 b->free_a = PETSC_TRUE; 1951 b->free_ij = PETSC_TRUE; 1952 /* the next line frees the default space generated by the Create() */ 1953 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1954 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1955 b->j = ju; 1956 b->i = iu; 1957 b->diag = 0; 1958 b->ilen = 0; 1959 b->imax = 0; 1960 1961 if (b->row) { 1962 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1963 } 1964 if (b->icol) { 1965 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1966 } 1967 b->row = perm; 1968 b->icol = perm; 1969 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1970 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1971 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1972 /* In b structure: Free imax, ilen, old a, old j. 1973 Allocate idnew, solve_work, new a, new j */ 1974 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1975 b->maxnz = b->nz = iu[mbs]; 1976 1977 (*B)->factor = FACTOR_CHOLESKY; 1978 (*B)->info.factor_mallocs = reallocs; 1979 (*B)->info.fill_ratio_given = f; 1980 if (ai[mbs] != 0) { 1981 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1982 } else { 1983 (*B)->info.fill_ratio_needed = 0.0; 1984 } 1985 1986 if (perm_identity){ 1987 switch (bs) { 1988 case 1: 1989 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1990 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1991 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1992 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1993 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 1994 break; 1995 case 2: 1996 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1997 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1998 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1999 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2000 break; 2001 case 3: 2002 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2003 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2004 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2005 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2006 break; 2007 case 4: 2008 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2009 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2010 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2011 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2012 break; 2013 case 5: 2014 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2015 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2016 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2017 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2018 break; 2019 case 6: 2020 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2021 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2022 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2023 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2024 break; 2025 case 7: 2026 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2027 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2028 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2029 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2030 break; 2031 default: 2032 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2033 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2034 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2035 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2036 break; 2037 } 2038 } 2039 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #include "petscbt.h" 2044 #include "src/mat/utils/freespace.h" 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2047 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 2048 { 2049 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2050 Mat_SeqSBAIJ *b; 2051 Mat B; 2052 PetscErrorCode ierr; 2053 PetscTruth perm_identity,free_ij = PETSC_TRUE; 2054 PetscInt bs=A->rmap.bs,am=a->mbs; 2055 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 2056 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2057 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2058 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2059 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2060 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2061 PetscBT lnkbt; 2062 PetscScalar *ua; 2063 2064 PetscFunctionBegin; 2065 /* 2066 This code originally uses Modified Sparse Row (MSR) storage 2067 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2068 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2069 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2070 thus the original code in MSR format is still used for these cases. 2071 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2072 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2073 */ 2074 if (bs > 1){ 2075 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /* check whether perm is the identity mapping */ 2080 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2081 2082 /* special case that simply copies fill pattern */ 2083 if (!levels && perm_identity) { 2084 a->permute = PETSC_FALSE; 2085 /* reuse the column pointers and row offsets for memory savings */ 2086 ui = a->i; 2087 uj = a->j; 2088 free_ij = PETSC_FALSE; 2089 ratio_needed = 1.0; 2090 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2091 if (perm_identity){ 2092 a->permute = PETSC_FALSE; 2093 ai = a->i; aj = a->j; 2094 } else { /* non-trivial permutation */ 2095 a->permute = PETSC_TRUE; 2096 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2097 ai = a->inew; aj = a->jnew; 2098 } 2099 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2100 2101 /* initialization */ 2102 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2103 ui[0] = 0; 2104 2105 /* jl: linked list for storing indices of the pivot rows 2106 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2107 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2108 il = jl + am; 2109 uj_ptr = (PetscInt**)(il + am); 2110 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2111 for (i=0; i<am; i++){ 2112 jl[i] = am; il[i] = 0; 2113 } 2114 2115 /* create and initialize a linked list for storing column indices of the active row k */ 2116 nlnk = am + 1; 2117 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2118 2119 /* initial FreeSpace size is fill*(ai[am]+1) */ 2120 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2121 current_space = free_space; 2122 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2123 current_space_lvl = free_space_lvl; 2124 2125 for (k=0; k<am; k++){ /* for each active row k */ 2126 /* initialize lnk by the column indices of row rip[k] */ 2127 nzk = 0; 2128 ncols = ai[rip[k]+1] - ai[rip[k]]; 2129 cols = aj+ai[rip[k]]; 2130 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2131 nzk += nlnk; 2132 2133 /* update lnk by computing fill-in for each pivot row to be merged in */ 2134 prow = jl[k]; /* 1st pivot row */ 2135 2136 while (prow < k){ 2137 nextprow = jl[prow]; 2138 2139 /* merge prow into k-th row */ 2140 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2141 jmax = ui[prow+1]; 2142 ncols = jmax-jmin; 2143 i = jmin - ui[prow]; 2144 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2145 j = *(uj_lvl_ptr[prow] + i - 1); 2146 cols_lvl = uj_lvl_ptr[prow]+i; 2147 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2148 nzk += nlnk; 2149 2150 /* update il and jl for prow */ 2151 if (jmin < jmax){ 2152 il[prow] = jmin; 2153 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2154 } 2155 prow = nextprow; 2156 } 2157 2158 /* if free space is not available, make more free space */ 2159 if (current_space->local_remaining<nzk) { 2160 i = am - k + 1; /* num of unfactored rows */ 2161 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2162 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2163 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2164 reallocs++; 2165 } 2166 2167 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2168 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2169 2170 /* add the k-th row into il and jl */ 2171 if (nzk-1 > 0){ 2172 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2173 jl[k] = jl[i]; jl[i] = k; 2174 il[k] = ui[k] + 1; 2175 } 2176 uj_ptr[k] = current_space->array; 2177 uj_lvl_ptr[k] = current_space_lvl->array; 2178 2179 current_space->array += nzk; 2180 current_space->local_used += nzk; 2181 current_space->local_remaining -= nzk; 2182 current_space_lvl->array += nzk; 2183 current_space_lvl->local_used += nzk; 2184 current_space_lvl->local_remaining -= nzk; 2185 2186 ui[k+1] = ui[k] + nzk; 2187 } 2188 2189 #if defined(PETSC_USE_INFO) 2190 if (ai[am] != 0) { 2191 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2192 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2193 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2194 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2195 } else { 2196 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2197 } 2198 #endif 2199 2200 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2201 ierr = PetscFree(jl);CHKERRQ(ierr); 2202 2203 /* destroy list of free space and other temporary array(s) */ 2204 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2205 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2206 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2207 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2208 if (ai[am] != 0) { 2209 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2210 } else { 2211 ratio_needed = 0.0; 2212 } 2213 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2214 2215 /* put together the new matrix in MATSEQSBAIJ format */ 2216 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr); 2217 ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr); 2218 B = *fact; 2219 b = (Mat_SeqSBAIJ*)B->data; 2220 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2221 b->singlemalloc = PETSC_FALSE; 2222 b->free_a = PETSC_TRUE; 2223 b->free_ij = free_ij; 2224 b->diag = 0; 2225 b->ilen = 0; 2226 b->imax = 0; 2227 b->row = perm; 2228 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2229 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2230 b->icol = perm; 2231 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2232 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2233 b->maxnz = b->nz = ui[am]; 2234 2235 B->factor = FACTOR_CHOLESKY; 2236 B->info.factor_mallocs = reallocs; 2237 B->info.fill_ratio_given = fill; 2238 B->info.fill_ratio_needed = ratio_needed; 2239 2240 if (perm_identity){ 2241 switch (bs) { 2242 case 1: 2243 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2244 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2245 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2246 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2247 break; 2248 case 2: 2249 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2250 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2251 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2252 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2253 break; 2254 case 3: 2255 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2256 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2257 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2258 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2259 break; 2260 case 4: 2261 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2262 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2263 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2264 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2265 break; 2266 case 5: 2267 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2268 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2269 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2270 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2271 break; 2272 case 6: 2273 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2274 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2275 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2276 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2277 break; 2278 case 7: 2279 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2280 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2281 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2282 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2283 break; 2284 default: 2285 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2286 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2287 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2288 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2289 break; 2290 } 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295