1 /* 2 Factorization code for SBAIJ format. 3 */ 4 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 #include "src/inline/dot.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering" 12 int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 13 { 14 PetscFunctionBegin; 15 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 16 } 17 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering" 20 int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 21 { 22 PetscFunctionBegin; 23 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 24 } 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering" 28 int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 29 { 30 PetscFunctionBegin; 31 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 32 } 33 34 #undef __FUNCT__ 35 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering" 36 int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 37 { 38 PetscFunctionBegin; 39 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 40 } 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering" 44 int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 45 { 46 PetscFunctionBegin; 47 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering" 52 int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 53 { 54 PetscFunctionBegin; 55 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering" 60 int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 61 { 62 PetscFunctionBegin; 63 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1" 68 int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 69 { 70 PetscFunctionBegin; 71 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2" 76 int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 77 { 78 PetscFunctionBegin; 79 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3" 84 int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 85 { 86 PetscFunctionBegin; 87 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4" 92 int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5" 100 int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 101 { 102 PetscFunctionBegin; 103 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6" 108 int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 109 { 110 PetscFunctionBegin; 111 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7" 116 int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 117 { 118 PetscFunctionBegin; 119 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 124 int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 125 { 126 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 127 IS isrow=a->row; 128 int mbs=a->mbs,*ai=a->i,*aj=a->j; 129 int nz,*vj,k,ierr,*r,idx,k1; 130 int bs=a->bs,bs2 = a->bs2; 131 MatScalar *aa=a->a,*v,*diag; 132 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 133 134 PetscFunctionBegin; 135 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 136 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 137 t = a->solve_work; 138 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 139 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 140 141 /* solve U^T * D * y = b by forward substitution */ 142 xk = t; 143 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 144 idx = bs*r[k]; 145 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 146 } 147 for (k=0; k<mbs; k++){ 148 v = aa + bs2*ai[k]; 149 xk = t + k*bs; /* Dk*xk = k-th block of x */ 150 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 151 nz = ai[k+1] - ai[k]; 152 vj = aj + ai[k]; 153 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 154 while (nz--) { 155 /* x(:) += U(k,:)^T*(Dk*xk) */ 156 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 157 vj++; xj = t + (*vj)*bs; 158 v += bs2; 159 } 160 /* xk = inv(Dk)*(Dk*xk) */ 161 diag = aa+k*bs2; /* ptr to inv(Dk) */ 162 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 163 } 164 165 /* solve U*x = y by back substitution */ 166 for (k=mbs-1; k>=0; k--){ 167 v = aa + bs2*ai[k]; 168 xk = t + k*bs; /* xk */ 169 nz = ai[k+1] - ai[k]; 170 vj = aj + ai[k]; 171 xj = t + (*vj)*bs; 172 while (nz--) { 173 /* xk += U(k,:)*x(:) */ 174 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 175 vj++; 176 v += bs2; xj = t + (*vj)*bs; 177 } 178 idx = bs*r[k]; 179 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 180 } 181 182 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 PetscLogFlops(bs2*(2*a->nz + mbs)); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 191 int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 int mbs=a->mbs,*ai=a->i,*aj=a->j; 195 int nz,*vj,k,ierr; 196 int bs=a->bs,bs2 = a->bs2; 197 MatScalar *aa=a->a,*v,*diag; 198 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 199 200 PetscFunctionBegin; 201 202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 204 205 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 206 207 /* solve U^T * D * y = b by forward substitution */ 208 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 209 for (k=0; k<mbs; k++){ 210 v = aa + bs2*ai[k]; 211 xk = x + k*bs; /* Dk*xk = k-th block of x */ 212 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 213 nz = ai[k+1] - ai[k]; 214 vj = aj + ai[k]; 215 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 216 while (nz--) { 217 /* x(:) += U(k,:)^T*(Dk*xk) */ 218 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 219 vj++; xj = x + (*vj)*bs; 220 v += bs2; 221 } 222 /* xk = inv(Dk)*(Dk*xk) */ 223 diag = aa+k*bs2; /* ptr to inv(Dk) */ 224 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 225 } 226 227 /* solve U*x = y by back substitution */ 228 for (k=mbs-1; k>=0; k--){ 229 v = aa + bs2*ai[k]; 230 xk = x + k*bs; /* xk */ 231 nz = ai[k+1] - ai[k]; 232 vj = aj + ai[k]; 233 xj = x + (*vj)*bs; 234 while (nz--) { 235 /* xk += U(k,:)*x(:) */ 236 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 237 vj++; 238 v += bs2; xj = x + (*vj)*bs; 239 } 240 } 241 242 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 243 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 244 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 245 PetscLogFlops(bs2*(2*a->nz + mbs)); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 251 int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 252 { 253 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 254 IS isrow=a->row; 255 int mbs=a->mbs,*ai=a->i,*aj=a->j; 256 int nz,*vj,k,ierr,*r,idx; 257 MatScalar *aa=a->a,*v,*d; 258 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 259 260 PetscFunctionBegin; 261 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 262 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 263 t = a->solve_work; 264 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 265 266 /* solve U^T * D * y = b by forward substitution */ 267 tp = t; 268 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 269 idx = 7*r[k]; 270 tp[0] = b[idx]; 271 tp[1] = b[idx+1]; 272 tp[2] = b[idx+2]; 273 tp[3] = b[idx+3]; 274 tp[4] = b[idx+4]; 275 tp[5] = b[idx+5]; 276 tp[6] = b[idx+6]; 277 tp += 7; 278 } 279 280 for (k=0; k<mbs; k++){ 281 v = aa + 49*ai[k]; 282 vj = aj + ai[k]; 283 tp = t + k*7; 284 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 285 nz = ai[k+1] - ai[k]; 286 tp = t + (*vj)*7; 287 while (nz--) { 288 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 289 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 290 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 291 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 292 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 293 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 294 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 295 vj++; tp = t + (*vj)*7; 296 v += 49; 297 } 298 299 /* xk = inv(Dk)*(Dk*xk) */ 300 d = aa+k*49; /* ptr to inv(Dk) */ 301 tp = t + k*7; 302 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 303 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 304 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 305 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 306 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 307 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 308 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 309 } 310 311 /* solve U*x = y by back substitution */ 312 for (k=mbs-1; k>=0; k--){ 313 v = aa + 49*ai[k]; 314 vj = aj + ai[k]; 315 tp = t + k*7; 316 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 317 nz = ai[k+1] - ai[k]; 318 319 tp = t + (*vj)*7; 320 while (nz--) { 321 /* xk += U(k,:)*x(:) */ 322 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 323 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 324 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 325 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 326 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 327 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 328 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 329 vj++; tp = t + (*vj)*7; 330 v += 49; 331 } 332 tp = t + k*7; 333 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 334 idx = 7*r[k]; 335 x[idx] = x0; 336 x[idx+1] = x1; 337 x[idx+2] = x2; 338 x[idx+3] = x3; 339 x[idx+4] = x4; 340 x[idx+5] = x5; 341 x[idx+6] = x6; 342 } 343 344 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 345 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 346 PetscLogFlops(49*(2*a->nz + mbs)); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 352 int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 353 { 354 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 355 int mbs=a->mbs,*ai=a->i,*aj=a->j; 356 MatScalar *aa=a->a,*v,*d; 357 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 358 int nz,*vj,k,ierr; 359 360 PetscFunctionBegin; 361 362 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 363 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 364 365 /* solve U^T * D * y = b by forward substitution */ 366 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 367 for (k=0; k<mbs; k++){ 368 v = aa + 49*ai[k]; 369 xp = x + k*7; 370 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 371 nz = ai[k+1] - ai[k]; 372 vj = aj + ai[k]; 373 xp = x + (*vj)*7; 374 while (nz--) { 375 /* x(:) += U(k,:)^T*(Dk*xk) */ 376 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 377 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 378 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 379 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 380 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 381 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 382 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 383 vj++; xp = x + (*vj)*7; 384 v += 49; 385 } 386 /* xk = inv(Dk)*(Dk*xk) */ 387 d = aa+k*49; /* ptr to inv(Dk) */ 388 xp = x + k*7; 389 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 390 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 391 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 392 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 393 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 394 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 395 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 396 } 397 398 /* solve U*x = y by back substitution */ 399 for (k=mbs-1; k>=0; k--){ 400 v = aa + 49*ai[k]; 401 xp = x + k*7; 402 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 403 nz = ai[k+1] - ai[k]; 404 vj = aj + ai[k]; 405 xp = x + (*vj)*7; 406 while (nz--) { 407 /* xk += U(k,:)*x(:) */ 408 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 409 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 410 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 411 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 412 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 413 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 414 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 415 vj++; 416 v += 49; xp = x + (*vj)*7; 417 } 418 xp = x + k*7; 419 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 420 } 421 422 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 423 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 424 PetscLogFlops(49*(2*a->nz + mbs)); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 430 int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 431 { 432 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 433 IS isrow=a->row; 434 int mbs=a->mbs,*ai=a->i,*aj=a->j; 435 int nz,*vj,k,ierr,*r,idx; 436 MatScalar *aa=a->a,*v,*d; 437 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 438 439 PetscFunctionBegin; 440 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 441 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 442 t = a->solve_work; 443 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 444 445 /* solve U^T * D * y = b by forward substitution */ 446 tp = t; 447 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 448 idx = 6*r[k]; 449 tp[0] = b[idx]; 450 tp[1] = b[idx+1]; 451 tp[2] = b[idx+2]; 452 tp[3] = b[idx+3]; 453 tp[4] = b[idx+4]; 454 tp[5] = b[idx+5]; 455 tp += 6; 456 } 457 458 for (k=0; k<mbs; k++){ 459 v = aa + 36*ai[k]; 460 vj = aj + ai[k]; 461 tp = t + k*6; 462 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 463 nz = ai[k+1] - ai[k]; 464 tp = t + (*vj)*6; 465 while (nz--) { 466 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 467 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 468 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 469 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 470 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 471 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 472 vj++; tp = t + (*vj)*6; 473 v += 36; 474 } 475 476 /* xk = inv(Dk)*(Dk*xk) */ 477 d = aa+k*36; /* ptr to inv(Dk) */ 478 tp = t + k*6; 479 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 480 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 481 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 482 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 483 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 484 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 485 } 486 487 /* solve U*x = y by back substitution */ 488 for (k=mbs-1; k>=0; k--){ 489 v = aa + 36*ai[k]; 490 vj = aj + ai[k]; 491 tp = t + k*6; 492 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 493 nz = ai[k+1] - ai[k]; 494 495 tp = t + (*vj)*6; 496 while (nz--) { 497 /* xk += U(k,:)*x(:) */ 498 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 499 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 500 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 501 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 502 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 503 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 504 vj++; tp = t + (*vj)*6; 505 v += 36; 506 } 507 tp = t + k*6; 508 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 509 idx = 6*r[k]; 510 x[idx] = x0; 511 x[idx+1] = x1; 512 x[idx+2] = x2; 513 x[idx+3] = x3; 514 x[idx+4] = x4; 515 x[idx+5] = x5; 516 } 517 518 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 520 PetscLogFlops(36*(2*a->nz + mbs)); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 526 int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 527 { 528 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 529 int mbs=a->mbs,*ai=a->i,*aj=a->j; 530 MatScalar *aa=a->a,*v,*d; 531 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 532 int nz,*vj,k,ierr; 533 534 PetscFunctionBegin; 535 536 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 537 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 538 539 /* solve U^T * D * y = b by forward substitution */ 540 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 541 for (k=0; k<mbs; k++){ 542 v = aa + 36*ai[k]; 543 xp = x + k*6; 544 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 545 nz = ai[k+1] - ai[k]; 546 vj = aj + ai[k]; 547 xp = x + (*vj)*6; 548 while (nz--) { 549 /* x(:) += U(k,:)^T*(Dk*xk) */ 550 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 551 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 552 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 553 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 554 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 555 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 556 vj++; xp = x + (*vj)*6; 557 v += 36; 558 } 559 /* xk = inv(Dk)*(Dk*xk) */ 560 d = aa+k*36; /* ptr to inv(Dk) */ 561 xp = x + k*6; 562 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 563 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 564 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 565 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 566 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 567 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 568 } 569 570 /* solve U*x = y by back substitution */ 571 for (k=mbs-1; k>=0; k--){ 572 v = aa + 36*ai[k]; 573 xp = x + k*6; 574 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 575 nz = ai[k+1] - ai[k]; 576 vj = aj + ai[k]; 577 xp = x + (*vj)*6; 578 while (nz--) { 579 /* xk += U(k,:)*x(:) */ 580 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 581 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 582 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 583 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 584 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 585 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 586 vj++; 587 v += 36; xp = x + (*vj)*6; 588 } 589 xp = x + k*6; 590 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 591 } 592 593 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 595 PetscLogFlops(36*(2*a->nz + mbs)); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 601 int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 602 { 603 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 604 IS isrow=a->row; 605 int mbs=a->mbs,*ai=a->i,*aj=a->j; 606 int nz,*vj,k,ierr,*r,idx; 607 MatScalar *aa=a->a,*v,*diag; 608 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 609 610 PetscFunctionBegin; 611 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 612 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 613 t = a->solve_work; 614 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 615 616 /* solve U^T * D * y = b by forward substitution */ 617 tp = t; 618 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 619 idx = 5*r[k]; 620 tp[0] = b[idx]; 621 tp[1] = b[idx+1]; 622 tp[2] = b[idx+2]; 623 tp[3] = b[idx+3]; 624 tp[4] = b[idx+4]; 625 tp += 5; 626 } 627 628 for (k=0; k<mbs; k++){ 629 v = aa + 25*ai[k]; 630 vj = aj + ai[k]; 631 tp = t + k*5; 632 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 633 nz = ai[k+1] - ai[k]; 634 635 tp = t + (*vj)*5; 636 while (nz--) { 637 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 638 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 639 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 640 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 641 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 642 vj++; tp = t + (*vj)*5; 643 v += 25; 644 } 645 646 /* xk = inv(Dk)*(Dk*xk) */ 647 diag = aa+k*25; /* ptr to inv(Dk) */ 648 tp = t + k*5; 649 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 650 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 651 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 652 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 653 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 654 } 655 656 /* solve U*x = y by back substitution */ 657 for (k=mbs-1; k>=0; k--){ 658 v = aa + 25*ai[k]; 659 vj = aj + ai[k]; 660 tp = t + k*5; 661 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 662 nz = ai[k+1] - ai[k]; 663 664 tp = t + (*vj)*5; 665 while (nz--) { 666 /* xk += U(k,:)*x(:) */ 667 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 668 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 669 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 670 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 671 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 672 vj++; tp = t + (*vj)*5; 673 v += 25; 674 } 675 tp = t + k*5; 676 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 677 idx = 5*r[k]; 678 x[idx] = x0; 679 x[idx+1] = x1; 680 x[idx+2] = x2; 681 x[idx+3] = x3; 682 x[idx+4] = x4; 683 } 684 685 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 PetscLogFlops(25*(2*a->nz + mbs)); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 693 int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 694 { 695 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 696 int mbs=a->mbs,*ai=a->i,*aj=a->j; 697 MatScalar *aa=a->a,*v,*diag; 698 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 699 int nz,*vj,k,ierr; 700 701 PetscFunctionBegin; 702 703 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 704 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 705 706 /* solve U^T * D * y = b by forward substitution */ 707 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 708 for (k=0; k<mbs; k++){ 709 v = aa + 25*ai[k]; 710 xp = x + k*5; 711 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 712 nz = ai[k+1] - ai[k]; 713 vj = aj + ai[k]; 714 xp = x + (*vj)*5; 715 while (nz--) { 716 /* x(:) += U(k,:)^T*(Dk*xk) */ 717 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 718 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 719 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 720 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 721 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 722 vj++; xp = x + (*vj)*5; 723 v += 25; 724 } 725 /* xk = inv(Dk)*(Dk*xk) */ 726 diag = aa+k*25; /* ptr to inv(Dk) */ 727 xp = x + k*5; 728 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 729 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 730 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 731 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 732 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 733 } 734 735 /* solve U*x = y by back substitution */ 736 for (k=mbs-1; k>=0; k--){ 737 v = aa + 25*ai[k]; 738 xp = x + k*5; 739 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 740 nz = ai[k+1] - ai[k]; 741 vj = aj + ai[k]; 742 xp = x + (*vj)*5; 743 while (nz--) { 744 /* xk += U(k,:)*x(:) */ 745 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 746 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 747 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 748 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 749 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 750 vj++; 751 v += 25; xp = x + (*vj)*5; 752 } 753 xp = x + k*5; 754 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 755 } 756 757 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 758 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 759 PetscLogFlops(25*(2*a->nz + mbs)); 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 765 int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 766 { 767 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 768 IS isrow=a->row; 769 int mbs=a->mbs,*ai=a->i,*aj=a->j; 770 int nz,*vj,k,ierr,*r,idx; 771 MatScalar *aa=a->a,*v,*diag; 772 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 773 774 PetscFunctionBegin; 775 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 776 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 777 t = a->solve_work; 778 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 779 780 /* solve U^T * D * y = b by forward substitution */ 781 tp = t; 782 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 783 idx = 4*r[k]; 784 tp[0] = b[idx]; 785 tp[1] = b[idx+1]; 786 tp[2] = b[idx+2]; 787 tp[3] = b[idx+3]; 788 tp += 4; 789 } 790 791 for (k=0; k<mbs; k++){ 792 v = aa + 16*ai[k]; 793 vj = aj + ai[k]; 794 tp = t + k*4; 795 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 796 nz = ai[k+1] - ai[k]; 797 798 tp = t + (*vj)*4; 799 while (nz--) { 800 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 801 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 802 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 803 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 804 vj++; tp = t + (*vj)*4; 805 v += 16; 806 } 807 808 /* xk = inv(Dk)*(Dk*xk) */ 809 diag = aa+k*16; /* ptr to inv(Dk) */ 810 tp = t + k*4; 811 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 812 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 813 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 814 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 815 } 816 817 /* solve U*x = y by back substitution */ 818 for (k=mbs-1; k>=0; k--){ 819 v = aa + 16*ai[k]; 820 vj = aj + ai[k]; 821 tp = t + k*4; 822 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 823 nz = ai[k+1] - ai[k]; 824 825 tp = t + (*vj)*4; 826 while (nz--) { 827 /* xk += U(k,:)*x(:) */ 828 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 829 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 830 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 831 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 832 vj++; tp = t + (*vj)*4; 833 v += 16; 834 } 835 tp = t + k*4; 836 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 837 idx = 4*r[k]; 838 x[idx] = x0; 839 x[idx+1] = x1; 840 x[idx+2] = x2; 841 x[idx+3] = x3; 842 } 843 844 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 845 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 846 PetscLogFlops(16*(2*a->nz + mbs)); 847 PetscFunctionReturn(0); 848 } 849 850 /* 851 Special case where the matrix was factored in the natural ordering. 852 This eliminates the need for the column and row permutation. 853 */ 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 856 int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 857 { 858 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 859 int mbs=a->mbs,*ai=a->i,*aj=a->j; 860 MatScalar *aa=a->a,*v,*diag; 861 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 862 int nz,*vj,k,ierr; 863 864 PetscFunctionBegin; 865 866 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 867 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 868 869 /* solve U^T * D * y = b by forward substitution */ 870 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 871 for (k=0; k<mbs; k++){ 872 v = aa + 16*ai[k]; 873 xp = x + k*4; 874 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 875 nz = ai[k+1] - ai[k]; 876 vj = aj + ai[k]; 877 xp = x + (*vj)*4; 878 while (nz--) { 879 /* x(:) += U(k,:)^T*(Dk*xk) */ 880 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 881 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 882 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 883 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 884 vj++; xp = x + (*vj)*4; 885 v += 16; 886 } 887 /* xk = inv(Dk)*(Dk*xk) */ 888 diag = aa+k*16; /* ptr to inv(Dk) */ 889 xp = x + k*4; 890 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 891 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 892 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 893 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 894 } 895 896 /* solve U*x = y by back substitution */ 897 for (k=mbs-1; k>=0; k--){ 898 v = aa + 16*ai[k]; 899 xp = x + k*4; 900 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 901 nz = ai[k+1] - ai[k]; 902 vj = aj + ai[k]; 903 xp = x + (*vj)*4; 904 while (nz--) { 905 /* xk += U(k,:)*x(:) */ 906 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 907 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 908 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 909 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 910 vj++; 911 v += 16; xp = x + (*vj)*4; 912 } 913 xp = x + k*4; 914 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 915 } 916 917 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 919 PetscLogFlops(16*(2*a->nz + mbs)); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 925 int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 926 { 927 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 928 IS isrow=a->row; 929 int mbs=a->mbs,*ai=a->i,*aj=a->j; 930 int nz,*vj,k,ierr,*r,idx; 931 MatScalar *aa=a->a,*v,*diag; 932 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 933 934 PetscFunctionBegin; 935 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 937 t = a->solve_work; 938 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 939 940 /* solve U^T * D * y = b by forward substitution */ 941 tp = t; 942 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 943 idx = 3*r[k]; 944 tp[0] = b[idx]; 945 tp[1] = b[idx+1]; 946 tp[2] = b[idx+2]; 947 tp += 3; 948 } 949 950 for (k=0; k<mbs; k++){ 951 v = aa + 9*ai[k]; 952 vj = aj + ai[k]; 953 tp = t + k*3; 954 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 955 nz = ai[k+1] - ai[k]; 956 957 tp = t + (*vj)*3; 958 while (nz--) { 959 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 960 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 961 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 962 vj++; tp = t + (*vj)*3; 963 v += 9; 964 } 965 966 /* xk = inv(Dk)*(Dk*xk) */ 967 diag = aa+k*9; /* ptr to inv(Dk) */ 968 tp = t + k*3; 969 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 970 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 971 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 972 } 973 974 /* solve U*x = y by back substitution */ 975 for (k=mbs-1; k>=0; k--){ 976 v = aa + 9*ai[k]; 977 vj = aj + ai[k]; 978 tp = t + k*3; 979 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 980 nz = ai[k+1] - ai[k]; 981 982 tp = t + (*vj)*3; 983 while (nz--) { 984 /* xk += U(k,:)*x(:) */ 985 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 986 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 987 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 988 vj++; tp = t + (*vj)*3; 989 v += 9; 990 } 991 tp = t + k*3; 992 tp[0] = x0; tp[1] = x1; tp[2] = x2; 993 idx = 3*r[k]; 994 x[idx] = x0; 995 x[idx+1] = x1; 996 x[idx+2] = x2; 997 } 998 999 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1001 PetscLogFlops(9*(2*a->nz + mbs)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 /* 1006 Special case where the matrix was factored in the natural ordering. 1007 This eliminates the need for the column and row permutation. 1008 */ 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1011 int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1012 { 1013 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1014 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1015 MatScalar *aa=a->a,*v,*diag; 1016 PetscScalar *x,*xp,*b,x0,x1,x2; 1017 int nz,*vj,k,ierr; 1018 1019 PetscFunctionBegin; 1020 1021 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1022 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1023 1024 /* solve U^T * D * y = b by forward substitution */ 1025 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1026 for (k=0; k<mbs; k++){ 1027 v = aa + 9*ai[k]; 1028 xp = x + k*3; 1029 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1030 nz = ai[k+1] - ai[k]; 1031 vj = aj + ai[k]; 1032 xp = x + (*vj)*3; 1033 while (nz--) { 1034 /* x(:) += U(k,:)^T*(Dk*xk) */ 1035 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1036 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1037 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1038 vj++; xp = x + (*vj)*3; 1039 v += 9; 1040 } 1041 /* xk = inv(Dk)*(Dk*xk) */ 1042 diag = aa+k*9; /* ptr to inv(Dk) */ 1043 xp = x + k*3; 1044 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1045 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1046 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1047 } 1048 1049 /* solve U*x = y by back substitution */ 1050 for (k=mbs-1; k>=0; k--){ 1051 v = aa + 9*ai[k]; 1052 xp = x + k*3; 1053 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1054 nz = ai[k+1] - ai[k]; 1055 vj = aj + ai[k]; 1056 xp = x + (*vj)*3; 1057 while (nz--) { 1058 /* xk += U(k,:)*x(:) */ 1059 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1060 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1061 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1062 vj++; 1063 v += 9; xp = x + (*vj)*3; 1064 } 1065 xp = x + k*3; 1066 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1067 } 1068 1069 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1071 PetscLogFlops(9*(2*a->nz + mbs)); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1077 int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1078 { 1079 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1080 IS isrow=a->row; 1081 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1082 int nz,*vj,k,k2,ierr,*r,idx; 1083 MatScalar *aa=a->a,*v,*diag; 1084 PetscScalar *x,*b,x0,x1,*t; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1088 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1089 t = a->solve_work; 1090 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 1091 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1092 1093 /* solve U^T * D * y = perm(b) by forward substitution */ 1094 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1095 idx = 2*r[k]; 1096 t[k*2] = b[idx]; 1097 t[k*2+1] = b[idx+1]; 1098 } 1099 for (k=0; k<mbs; k++){ 1100 v = aa + 4*ai[k]; 1101 vj = aj + ai[k]; 1102 k2 = k*2; 1103 x0 = t[k2]; x1 = t[k2+1]; 1104 nz = ai[k+1] - ai[k]; 1105 while (nz--) { 1106 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1107 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1108 vj++; v += 4; 1109 } 1110 diag = aa+k*4; /* ptr to inv(Dk) */ 1111 t[k2] = diag[0]*x0 + diag[2]*x1; 1112 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1113 } 1114 1115 /* solve U*x = y by back substitution */ 1116 for (k=mbs-1; k>=0; k--){ 1117 v = aa + 4*ai[k]; 1118 vj = aj + ai[k]; 1119 k2 = k*2; 1120 x0 = t[k2]; x1 = t[k2+1]; 1121 nz = ai[k+1] - ai[k]; 1122 while (nz--) { 1123 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1124 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1125 vj++; v += 4; 1126 } 1127 t[k2] = x0; 1128 t[k2+1] = x1; 1129 idx = 2*r[k]; 1130 x[idx] = x0; 1131 x[idx+1] = x1; 1132 } 1133 1134 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1135 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1137 PetscLogFlops(4*(2*a->nz + mbs)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 /* 1142 Special case where the matrix was factored in the natural ordering. 1143 This eliminates the need for the column and row permutation. 1144 */ 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1147 int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1148 { 1149 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1150 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1151 MatScalar *aa=a->a,*v,*diag; 1152 PetscScalar *x,*b,x0,x1; 1153 int nz,*vj,k,k2,ierr; 1154 1155 PetscFunctionBegin; 1156 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 1160 /* solve U^T * D * y = b by forward substitution */ 1161 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1162 for (k=0; k<mbs; k++){ 1163 v = aa + 4*ai[k]; 1164 vj = aj + ai[k]; 1165 k2 = k*2; 1166 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1167 nz = ai[k+1] - ai[k]; 1168 1169 while (nz--) { 1170 /* x(:) += U(k,:)^T*(Dk*xk) */ 1171 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1172 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1173 vj++; v += 4; 1174 } 1175 /* xk = inv(Dk)*(Dk*xk) */ 1176 diag = aa+k*4; /* ptr to inv(Dk) */ 1177 x[k2] = diag[0]*x0 + diag[2]*x1; 1178 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1179 } 1180 1181 /* solve U*x = y by back substitution */ 1182 for (k=mbs-1; k>=0; k--){ 1183 v = aa + 4*ai[k]; 1184 vj = aj + ai[k]; 1185 k2 = k*2; 1186 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1187 nz = ai[k+1] - ai[k]; 1188 while (nz--) { 1189 /* xk += U(k,:)*x(:) */ 1190 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1191 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1192 vj++; v += 4; 1193 } 1194 x[k2] = x0; 1195 x[k2+1] = x1; 1196 } 1197 1198 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1200 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1206 int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1207 { 1208 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1209 IS isrow=a->row; 1210 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip; 1211 MatScalar *aa=a->a,*v; 1212 PetscScalar *x,*b,xk,*t; 1213 int nz,*vj,k; 1214 1215 PetscFunctionBegin; 1216 if (!mbs) PetscFunctionReturn(0); 1217 1218 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1220 t = a->solve_work; 1221 1222 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1223 1224 /* solve U^T*D*y = perm(b) by forward substitution */ 1225 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1226 for (k=0; k<mbs; k++){ 1227 v = aa + ai[k]; 1228 vj = aj + ai[k]; 1229 xk = t[k]; 1230 nz = ai[k+1] - ai[k]; 1231 while (nz--) t[*vj++] += (*v++) * xk; 1232 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1233 } 1234 1235 /* solve U*x = y by back substitution */ 1236 for (k=mbs-1; k>=0; k--){ 1237 v = aa + ai[k]; 1238 vj = aj + ai[k]; 1239 xk = t[k]; 1240 nz = ai[k+1] - ai[k]; 1241 while (nz--) xk += (*v++) * t[*vj++]; 1242 t[k] = xk; 1243 x[rip[k]] = xk; 1244 } 1245 1246 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1247 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1248 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1249 PetscLogFlops(4*a->nz + A->m); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1255 int MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1256 { 1257 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1258 int ierr; 1259 1260 PetscFunctionBegin; 1261 if (a->bs == 1) { 1262 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1263 } else { 1264 IS isrow=a->row; 1265 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1266 MatScalar *aa=a->a,*v; 1267 PetscScalar *x,*b,*t; 1268 int nz,*vj,k,n; 1269 if (bb->n > a->solves_work_n) { 1270 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1271 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1272 a->solves_work_n = bb->n; 1273 } 1274 n = bb->n; 1275 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1276 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1277 t = a->solves_work; 1278 1279 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1280 1281 /* solve U^T*D*y = perm(b) by forward substitution */ 1282 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1283 for (k=0; k<mbs; k++){ 1284 v = aa + ai[k]; 1285 vj = aj + ai[k]; 1286 nz = ai[k+1] - ai[k]; 1287 while (nz--) { 1288 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1289 v++;vj++; 1290 } 1291 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1292 } 1293 1294 /* solve U*x = y by back substitution */ 1295 for (k=mbs-1; k>=0; k--){ 1296 v = aa + ai[k]; 1297 vj = aj + ai[k]; 1298 nz = ai[k+1] - ai[k]; 1299 while (nz--) { 1300 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1301 v++;vj++; 1302 } 1303 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1304 } 1305 1306 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1307 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1309 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* 1315 Special case where the matrix was ILU(0) factored in the natural 1316 ordering. This eliminates the need for the column and row permutation. 1317 */ 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1320 int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1321 { 1322 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1323 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr; 1324 MatScalar *aa=a->a,*v; 1325 PetscScalar *x,*b,xk; 1326 int nz,*vj,k; 1327 1328 PetscFunctionBegin; 1329 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1330 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1331 1332 /* solve U^T*D*y = b by forward substitution */ 1333 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1334 for (k=0; k<mbs; k++){ 1335 v = aa + ai[k] + 1; 1336 vj = aj + ai[k] + 1; 1337 xk = x[k]; 1338 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1339 while (nz--) x[*vj++] += (*v++) * xk; 1340 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1341 } 1342 1343 /* solve U*x = y by back substitution */ 1344 for (k=mbs-2; k>=0; k--){ 1345 v = aa + ai[k] + 1; 1346 vj = aj + ai[k] + 1; 1347 xk = x[k]; 1348 nz = ai[k+1] - ai[k] - 1; 1349 while (nz--) xk += (*v++) * x[*vj++]; 1350 x[k] = xk; 1351 } 1352 1353 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1354 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1355 PetscLogFlops(4*a->nz + A->m); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1362 int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1363 { 1364 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1365 int *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1366 int *jutmp,bs = a->bs,bs2=a->bs2; 1367 int m,realloc = 0,*levtmp; 1368 int *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1369 int incrlev,*lev,shift,prow,nz; 1370 int *il,ili,nextprow; 1371 PetscReal f = info->fill,levels = info->levels; 1372 PetscTruth perm_identity; 1373 1374 PetscFunctionBegin; 1375 /* check whether perm is the identity mapping */ 1376 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1377 1378 /* special case that simply copies fill pattern */ 1379 if (!levels && perm_identity && bs==1) { 1380 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1381 (*B)->factor = FACTOR_CHOLESKY; 1382 b = (Mat_SeqSBAIJ*)(*B)->data; 1383 b->row = perm; 1384 b->icol = perm; 1385 b->factor_damping = info->damping; 1386 b->factor_shift = info->shift; 1387 b->factor_zeropivot = info->zeropivot; 1388 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1389 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1390 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1391 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1392 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1397 if (levels > 0 && perm_identity && bs==1 ){ 1398 if (!perm_identity) a->permute = PETSC_TRUE; 1399 1400 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1401 1402 if (perm_identity){ /* without permutation */ 1403 ai = a->i; aj = a->j; 1404 } else { /* non-trivial permutation */ 1405 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1406 ai = a->inew; aj = a->jnew; 1407 } 1408 1409 /* initialization */ 1410 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1411 umax = (int)(f*ai[mbs] + 1); 1412 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1413 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1414 iu[0] = 0; 1415 juidx = 0; /* index for ju */ 1416 ierr = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1417 q = jl + mbs; /* linked list for col index of active row */ 1418 levtmp = q + mbs; 1419 il = levtmp + mbs; 1420 for (i=0; i<mbs; i++){ 1421 jl[i] = mbs; 1422 q[i] = 0; 1423 il[i] = 0; 1424 } 1425 1426 /* for each row k */ 1427 for (k=0; k<mbs; k++){ 1428 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1429 q[k] = mbs; 1430 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1431 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1432 jmax = ai[rip[k]+1]; 1433 for (j=jmin; j<jmax; j++){ 1434 vj = rip[aj[j]]; /* col. value */ 1435 if(vj > k){ 1436 qm = k; 1437 do { 1438 m = qm; qm = q[m]; 1439 } while(qm < vj); 1440 if (qm == vj) { 1441 SETERRQ(1," error: duplicate entry in A\n"); 1442 } 1443 nzk++; 1444 q[m] = vj; 1445 q[vj] = qm; 1446 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1447 } /* if(vj > k) */ 1448 } /* for (j=jmin; j<jmax; j++) */ 1449 1450 /* modify nonzero structure of k-th row by computing fill-in 1451 for each row i to be merged in */ 1452 prow = k; 1453 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1454 1455 while (prow < k){ 1456 nextprow = jl[prow]; 1457 /* merge row prow into k-th row */ 1458 ili = il[prow]; 1459 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1460 jmax = iu[prow+1]; 1461 qm = k; 1462 for (j=jmin; j<jmax; j++){ 1463 vj = ju[j]; 1464 incrlev = lev[j] + 1; 1465 if (incrlev > levels) continue; 1466 do { 1467 m = qm; qm = q[m]; 1468 } while (qm < vj); 1469 if (qm != vj){ /* a fill */ 1470 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1471 levtmp[vj] = incrlev; 1472 } else { 1473 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1474 } 1475 } 1476 if (jmin < jmax){ /* update il and jl */ 1477 il[prow] = jmin; 1478 j = ju[jmin]; 1479 jl[prow] = jl[j]; jl[j] = prow; 1480 } 1481 prow = nextprow; 1482 } 1483 1484 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1485 if (nzk > 0){ 1486 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1487 jl[k] = jl[i]; jl[i] = k; 1488 il[k] = iu[k] + 1; 1489 } 1490 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1491 1492 /* allocate more space to ju if needed */ 1493 if (iu[k+1] > umax) { 1494 /* estimate how much additional space we will need */ 1495 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1496 /* just double the memory each time */ 1497 maxadd = umax; 1498 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1499 umax += maxadd; 1500 1501 /* allocate a longer ju */ 1502 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1503 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1504 ierr = PetscFree(ju);CHKERRQ(ierr); 1505 ju = jutmp; 1506 1507 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1508 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr); 1509 ierr = PetscFree(lev);CHKERRQ(ierr); 1510 lev = jutmp; 1511 realloc += 2; /* count how many times we realloc */ 1512 } 1513 1514 /* save nonzero structure of k-th row in ju */ 1515 ju[juidx] = k; /* diag[k] */ 1516 lev[juidx] = 0; 1517 juidx++; 1518 i = k; 1519 while (nzk --) { 1520 i = q[i]; 1521 ju[juidx] = i; 1522 lev[juidx] = levtmp[i]; 1523 juidx++; 1524 } 1525 } /* end of for (k=0; k<mbs; k++) */ 1526 1527 if (ai[mbs] != 0) { 1528 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1529 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1530 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1531 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1532 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1533 } else { 1534 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1535 } 1536 1537 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1538 ierr = PetscFree(jl);CHKERRQ(ierr); 1539 ierr = PetscFree(lev);CHKERRQ(ierr); 1540 1541 /* put together the new matrix */ 1542 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1543 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1544 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1545 1546 /* PetscLogObjectParent(*B,iperm); */ 1547 b = (Mat_SeqSBAIJ*)(*B)->data; 1548 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1549 b->singlemalloc = PETSC_FALSE; 1550 /* the next line frees the default space generated by the Create() */ 1551 ierr = PetscFree(b->a);CHKERRQ(ierr); 1552 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1553 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1554 b->j = ju; 1555 b->i = iu; 1556 b->diag = 0; 1557 b->ilen = 0; 1558 b->imax = 0; 1559 b->row = perm; 1560 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1561 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1562 b->icol = perm; 1563 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1564 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1565 /* In b structure: Free imax, ilen, old a, old j. 1566 Allocate idnew, solve_work, new a, new j */ 1567 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1568 b->maxnz = b->nz = iu[mbs]; 1569 b->factor_damping = info->damping; 1570 b->factor_shift = info->shift; 1571 b->factor_zeropivot = info->zeropivot; 1572 1573 (*B)->factor = FACTOR_CHOLESKY; 1574 (*B)->info.factor_mallocs = realloc; 1575 (*B)->info.fill_ratio_given = f; 1576 if (ai[mbs] != 0) { 1577 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1578 } else { 1579 (*B)->info.fill_ratio_needed = 0.0; 1580 } 1581 1582 1583 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1584 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1585 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1586 1587 PetscFunctionReturn(0); 1588 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1589 1590 if (!perm_identity) a->permute = PETSC_TRUE; 1591 if (perm_identity){ 1592 ai = a->i; aj = a->j; 1593 } else { /* non-trivial permutation */ 1594 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1595 ai = a->inew; aj = a->jnew; 1596 } 1597 1598 /* initialization */ 1599 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1600 umax = (int)(f*ai[mbs] + 1); 1601 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1602 umax += mbs + 1; 1603 shift = mbs + 1; 1604 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1605 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1606 iu[0] = mbs + 1; 1607 juidx = mbs + 1; 1608 /* prowl: linked list for pivot row */ 1609 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr); 1610 /* q: linked list for col index */ 1611 q = prowl + mbs; 1612 levtmp = q + mbs; 1613 1614 for (i=0; i<mbs; i++){ 1615 prowl[i] = mbs; 1616 q[i] = 0; 1617 } 1618 1619 /* for each row k */ 1620 for (k=0; k<mbs; k++){ 1621 nzk = 0; 1622 q[k] = mbs; 1623 /* copy current row into linked list */ 1624 nz = ai[rip[k]+1] - ai[rip[k]]; 1625 j = ai[rip[k]]; 1626 while (nz--){ 1627 vj = rip[aj[j++]]; 1628 if (vj > k){ 1629 qm = k; 1630 do { 1631 m = qm; qm = q[m]; 1632 } while(qm < vj); 1633 if (qm == vj) { 1634 SETERRQ(1," error: duplicate entry in A\n"); 1635 } 1636 nzk++; 1637 q[m] = vj; 1638 q[vj] = qm; 1639 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1640 } 1641 } 1642 1643 /* modify nonzero structure of k-th row by computing fill-in 1644 for each row prow to be merged in */ 1645 prow = k; 1646 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1647 1648 while (prow < k){ 1649 /* merge row prow into k-th row */ 1650 jmin = iu[prow] + 1; 1651 jmax = iu[prow+1]; 1652 qm = k; 1653 for (j=jmin; j<jmax; j++){ 1654 incrlev = lev[j-shift] + 1; 1655 if (incrlev > levels) continue; 1656 1657 vj = ju[j]; 1658 do { 1659 m = qm; qm = q[m]; 1660 } while (qm < vj); 1661 if (qm != vj){ /* a fill */ 1662 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1663 levtmp[vj] = incrlev; 1664 } else { 1665 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1666 } 1667 } 1668 prow = prowl[prow]; /* next pivot row */ 1669 } 1670 1671 /* add k to row list for first nonzero element in k-th row */ 1672 if (nzk > 1){ 1673 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1674 prowl[k] = prowl[i]; prowl[i] = k; 1675 } 1676 iu[k+1] = iu[k] + nzk; 1677 1678 /* allocate more space to ju and lev if needed */ 1679 if (iu[k+1] > umax) { 1680 /* estimate how much additional space we will need */ 1681 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1682 /* just double the memory each time */ 1683 maxadd = umax; 1684 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1685 umax += maxadd; 1686 1687 /* allocate a longer ju */ 1688 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1689 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1690 ierr = PetscFree(ju);CHKERRQ(ierr); 1691 ju = jutmp; 1692 1693 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1694 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr); 1695 ierr = PetscFree(lev);CHKERRQ(ierr); 1696 lev = jutmp; 1697 realloc += 2; /* count how many times we realloc */ 1698 } 1699 1700 /* save nonzero structure of k-th row in ju */ 1701 i=k; 1702 while (nzk --) { 1703 i = q[i]; 1704 ju[juidx] = i; 1705 lev[juidx-shift] = levtmp[i]; 1706 juidx++; 1707 } 1708 } 1709 1710 if (ai[mbs] != 0) { 1711 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1712 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1713 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1714 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1715 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1716 } else { 1717 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1718 } 1719 1720 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1721 ierr = PetscFree(prowl);CHKERRQ(ierr); 1722 ierr = PetscFree(lev);CHKERRQ(ierr); 1723 1724 /* put together the new matrix */ 1725 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1726 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1727 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1728 1729 /* PetscLogObjectParent(*B,iperm); */ 1730 b = (Mat_SeqSBAIJ*)(*B)->data; 1731 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1732 b->singlemalloc = PETSC_FALSE; 1733 /* the next line frees the default space generated by the Create() */ 1734 ierr = PetscFree(b->a);CHKERRQ(ierr); 1735 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1736 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1737 b->j = ju; 1738 b->i = iu; 1739 b->diag = 0; 1740 b->ilen = 0; 1741 b->imax = 0; 1742 1743 if (b->row) { 1744 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1745 } 1746 if (b->icol) { 1747 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1748 } 1749 b->row = perm; 1750 b->icol = perm; 1751 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1752 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1753 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1754 /* In b structure: Free imax, ilen, old a, old j. 1755 Allocate idnew, solve_work, new a, new j */ 1756 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1757 b->maxnz = b->nz = iu[mbs]; 1758 1759 (*B)->factor = FACTOR_CHOLESKY; 1760 (*B)->info.factor_mallocs = realloc; 1761 (*B)->info.fill_ratio_given = f; 1762 if (ai[mbs] != 0) { 1763 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1764 } else { 1765 (*B)->info.fill_ratio_needed = 0.0; 1766 } 1767 1768 if (perm_identity){ 1769 switch (bs) { 1770 case 1: 1771 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1772 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1773 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1774 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1775 break; 1776 case 2: 1777 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1778 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1779 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1780 break; 1781 case 3: 1782 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1783 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1784 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1785 break; 1786 case 4: 1787 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1788 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1789 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1790 break; 1791 case 5: 1792 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1793 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1794 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1795 break; 1796 case 6: 1797 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1798 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1799 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1800 break; 1801 case 7: 1802 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1803 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1804 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1805 break; 1806 default: 1807 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1808 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1809 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1810 break; 1811 } 1812 } 1813 1814 PetscFunctionReturn(0); 1815 } 1816 1817 1818 1819