1 /*$Id: sbaijfact2.c,v 1.2.1.41 2001/08/07 03:03:01 balay Exp $*/ 2 /* 3 Factorization code for SBAIJ format. 4 */ 5 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering" 14 int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 15 { 16 PetscFunctionBegin; 17 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering" 22 int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 23 { 24 PetscFunctionBegin; 25 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 26 } 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering" 30 int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 31 { 32 PetscFunctionBegin; 33 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering" 38 int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 39 { 40 PetscFunctionBegin; 41 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering" 46 int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 47 { 48 PetscFunctionBegin; 49 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering" 54 int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 55 { 56 PetscFunctionBegin; 57 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering" 62 int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 63 { 64 PetscFunctionBegin; 65 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1" 70 int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 71 { 72 PetscFunctionBegin; 73 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2" 78 int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 79 { 80 PetscFunctionBegin; 81 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3" 86 int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 87 { 88 PetscFunctionBegin; 89 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 90 } 91 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4" 94 int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 95 { 96 PetscFunctionBegin; 97 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 98 } 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5" 102 int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 103 { 104 PetscFunctionBegin; 105 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6" 110 int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 111 { 112 PetscFunctionBegin; 113 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7" 118 int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 119 { 120 PetscFunctionBegin; 121 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 126 int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 127 { 128 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 129 IS isrow=a->row; 130 int mbs=a->mbs,*ai=a->i,*aj=a->j; 131 int nz,*vj,k,ierr,*r,idx,k1; 132 int bs=a->bs,bs2 = a->bs2; 133 MatScalar *aa=a->a,*v,*diag; 134 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 135 136 PetscFunctionBegin; 137 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 138 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 139 t = a->solve_work; 140 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 141 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 142 143 /* solve U^T * D * y = b by forward substitution */ 144 xk = t; 145 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 146 idx = bs*r[k]; 147 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 148 } 149 for (k=0; k<mbs; k++){ 150 v = aa + bs2*ai[k]; 151 xk = t + k*bs; /* Dk*xk = k-th block of x */ 152 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 153 nz = ai[k+1] - ai[k]; 154 vj = aj + ai[k]; 155 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 156 while (nz--) { 157 /* x(:) += U(k,:)^T*(Dk*xk) */ 158 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 159 vj++; xj = t + (*vj)*bs; 160 v += bs2; 161 } 162 /* xk = inv(Dk)*(Dk*xk) */ 163 diag = aa+k*bs2; /* ptr to inv(Dk) */ 164 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 165 } 166 167 /* solve U*x = y by back substitution */ 168 for (k=mbs-1; k>=0; k--){ 169 v = aa + bs2*ai[k]; 170 xk = t + k*bs; /* xk */ 171 nz = ai[k+1] - ai[k]; 172 vj = aj + ai[k]; 173 xj = t + (*vj)*bs; 174 while (nz--) { 175 /* xk += U(k,:)*x(:) */ 176 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 177 vj++; 178 v += bs2; xj = t + (*vj)*bs; 179 } 180 idx = bs*r[k]; 181 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 182 } 183 184 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 185 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 186 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 187 PetscLogFlops(bs2*(2*a->s_nz + mbs)); 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 193 int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 194 { 195 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 196 int mbs=a->mbs,*ai=a->i,*aj=a->j; 197 int nz,*vj,k,ierr; 198 int bs=a->bs,bs2 = a->bs2; 199 MatScalar *aa=a->a,*v,*diag; 200 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 201 202 PetscFunctionBegin; 203 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 207 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 208 209 /* solve U^T * D * y = b by forward substitution */ 210 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 211 for (k=0; k<mbs; k++){ 212 v = aa + bs2*ai[k]; 213 xk = x + k*bs; /* Dk*xk = k-th block of x */ 214 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 215 nz = ai[k+1] - ai[k]; 216 vj = aj + ai[k]; 217 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 218 while (nz--) { 219 /* x(:) += U(k,:)^T*(Dk*xk) */ 220 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 221 vj++; xj = x + (*vj)*bs; 222 v += bs2; 223 } 224 /* xk = inv(Dk)*(Dk*xk) */ 225 diag = aa+k*bs2; /* ptr to inv(Dk) */ 226 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 227 } 228 229 /* solve U*x = y by back substitution */ 230 for (k=mbs-1; k>=0; k--){ 231 v = aa + bs2*ai[k]; 232 xk = x + k*bs; /* xk */ 233 nz = ai[k+1] - ai[k]; 234 vj = aj + ai[k]; 235 xj = x + (*vj)*bs; 236 while (nz--) { 237 /* xk += U(k,:)*x(:) */ 238 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 239 vj++; 240 v += bs2; xj = x + (*vj)*bs; 241 } 242 } 243 244 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 245 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 246 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 247 PetscLogFlops(bs2*(2*a->s_nz + mbs)); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 253 int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 254 { 255 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 256 IS isrow=a->row; 257 int mbs=a->mbs,*ai=a->i,*aj=a->j; 258 int nz,*vj,k,ierr,*r,idx; 259 MatScalar *aa=a->a,*v,*d; 260 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 261 262 PetscFunctionBegin; 263 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 t = a->solve_work; 266 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 267 268 /* solve U^T * D * y = b by forward substitution */ 269 tp = t; 270 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 271 idx = 7*r[k]; 272 tp[0] = b[idx]; 273 tp[1] = b[idx+1]; 274 tp[2] = b[idx+2]; 275 tp[3] = b[idx+3]; 276 tp[4] = b[idx+4]; 277 tp[5] = b[idx+5]; 278 tp[6] = b[idx+6]; 279 tp += 7; 280 } 281 282 for (k=0; k<mbs; k++){ 283 v = aa + 49*ai[k]; 284 vj = aj + ai[k]; 285 tp = t + k*7; 286 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 287 nz = ai[k+1] - ai[k]; 288 tp = t + (*vj)*7; 289 while (nz--) { 290 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 291 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 292 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 293 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 294 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 295 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 296 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 297 vj++; tp = t + (*vj)*7; 298 v += 49; 299 } 300 301 /* xk = inv(Dk)*(Dk*xk) */ 302 d = aa+k*49; /* ptr to inv(Dk) */ 303 tp = t + k*7; 304 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 305 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 306 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 307 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 308 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 309 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 310 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 311 } 312 313 /* solve U*x = y by back substitution */ 314 for (k=mbs-1; k>=0; k--){ 315 v = aa + 49*ai[k]; 316 vj = aj + ai[k]; 317 tp = t + k*7; 318 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 319 nz = ai[k+1] - ai[k]; 320 321 tp = t + (*vj)*7; 322 while (nz--) { 323 /* xk += U(k,:)*x(:) */ 324 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 325 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 326 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 327 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 328 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 329 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 330 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 331 vj++; tp = t + (*vj)*7; 332 v += 49; 333 } 334 tp = t + k*7; 335 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 336 idx = 7*r[k]; 337 x[idx] = x0; 338 x[idx+1] = x1; 339 x[idx+2] = x2; 340 x[idx+3] = x3; 341 x[idx+4] = x4; 342 x[idx+5] = x5; 343 x[idx+6] = x6; 344 } 345 346 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 347 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 348 PetscLogFlops(49*(2*a->s_nz + mbs)); 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 354 int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 355 { 356 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 357 int mbs=a->mbs,*ai=a->i,*aj=a->j; 358 MatScalar *aa=a->a,*v,*d; 359 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 360 int nz,*vj,k,ierr; 361 362 PetscFunctionBegin; 363 364 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 365 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 366 367 /* solve U^T * D * y = b by forward substitution */ 368 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 369 for (k=0; k<mbs; k++){ 370 v = aa + 49*ai[k]; 371 xp = x + k*7; 372 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 373 nz = ai[k+1] - ai[k]; 374 vj = aj + ai[k]; 375 xp = x + (*vj)*7; 376 while (nz--) { 377 /* x(:) += U(k,:)^T*(Dk*xk) */ 378 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 379 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 380 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 381 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 382 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 383 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 384 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 385 vj++; xp = x + (*vj)*7; 386 v += 49; 387 } 388 /* xk = inv(Dk)*(Dk*xk) */ 389 d = aa+k*49; /* ptr to inv(Dk) */ 390 xp = x + k*7; 391 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 392 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 393 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 394 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 395 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 396 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 397 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 398 } 399 400 /* solve U*x = y by back substitution */ 401 for (k=mbs-1; k>=0; k--){ 402 v = aa + 49*ai[k]; 403 xp = x + k*7; 404 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 405 nz = ai[k+1] - ai[k]; 406 vj = aj + ai[k]; 407 xp = x + (*vj)*7; 408 while (nz--) { 409 /* xk += U(k,:)*x(:) */ 410 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 411 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 412 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 413 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 414 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 415 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 416 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 417 vj++; 418 v += 49; xp = x + (*vj)*7; 419 } 420 xp = x + k*7; 421 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 422 } 423 424 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 425 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 426 PetscLogFlops(49*(2*a->s_nz + mbs)); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 432 int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 433 { 434 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 435 IS isrow=a->row; 436 int mbs=a->mbs,*ai=a->i,*aj=a->j; 437 int nz,*vj,k,ierr,*r,idx; 438 MatScalar *aa=a->a,*v,*d; 439 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 440 441 PetscFunctionBegin; 442 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 443 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 444 t = a->solve_work; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 447 /* solve U^T * D * y = b by forward substitution */ 448 tp = t; 449 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 450 idx = 6*r[k]; 451 tp[0] = b[idx]; 452 tp[1] = b[idx+1]; 453 tp[2] = b[idx+2]; 454 tp[3] = b[idx+3]; 455 tp[4] = b[idx+4]; 456 tp[5] = b[idx+5]; 457 tp += 6; 458 } 459 460 for (k=0; k<mbs; k++){ 461 v = aa + 36*ai[k]; 462 vj = aj + ai[k]; 463 tp = t + k*6; 464 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 465 nz = ai[k+1] - ai[k]; 466 tp = t + (*vj)*6; 467 while (nz--) { 468 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 469 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 470 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 471 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 472 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 473 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 474 vj++; tp = t + (*vj)*6; 475 v += 36; 476 } 477 478 /* xk = inv(Dk)*(Dk*xk) */ 479 d = aa+k*36; /* ptr to inv(Dk) */ 480 tp = t + k*6; 481 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 482 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 483 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 484 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 485 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 486 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 487 } 488 489 /* solve U*x = y by back substitution */ 490 for (k=mbs-1; k>=0; k--){ 491 v = aa + 36*ai[k]; 492 vj = aj + ai[k]; 493 tp = t + k*6; 494 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 495 nz = ai[k+1] - ai[k]; 496 497 tp = t + (*vj)*6; 498 while (nz--) { 499 /* xk += U(k,:)*x(:) */ 500 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 501 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 502 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 503 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 504 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 505 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 506 vj++; tp = t + (*vj)*6; 507 v += 36; 508 } 509 tp = t + k*6; 510 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 511 idx = 6*r[k]; 512 x[idx] = x0; 513 x[idx+1] = x1; 514 x[idx+2] = x2; 515 x[idx+3] = x3; 516 x[idx+4] = x4; 517 x[idx+5] = x5; 518 } 519 520 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 521 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 522 PetscLogFlops(36*(2*a->s_nz + mbs)); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 528 int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 529 { 530 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 531 int mbs=a->mbs,*ai=a->i,*aj=a->j; 532 MatScalar *aa=a->a,*v,*d; 533 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 534 int nz,*vj,k,ierr; 535 536 PetscFunctionBegin; 537 538 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 539 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 540 541 /* solve U^T * D * y = b by forward substitution */ 542 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 543 for (k=0; k<mbs; k++){ 544 v = aa + 36*ai[k]; 545 xp = x + k*6; 546 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 547 nz = ai[k+1] - ai[k]; 548 vj = aj + ai[k]; 549 xp = x + (*vj)*6; 550 while (nz--) { 551 /* x(:) += U(k,:)^T*(Dk*xk) */ 552 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 553 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 554 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 555 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 556 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 557 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 558 vj++; xp = x + (*vj)*6; 559 v += 36; 560 } 561 /* xk = inv(Dk)*(Dk*xk) */ 562 d = aa+k*36; /* ptr to inv(Dk) */ 563 xp = x + k*6; 564 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 565 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 566 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 567 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 568 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 569 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 570 } 571 572 /* solve U*x = y by back substitution */ 573 for (k=mbs-1; k>=0; k--){ 574 v = aa + 36*ai[k]; 575 xp = x + k*6; 576 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 577 nz = ai[k+1] - ai[k]; 578 vj = aj + ai[k]; 579 xp = x + (*vj)*6; 580 while (nz--) { 581 /* xk += U(k,:)*x(:) */ 582 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 583 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 584 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 585 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 586 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 587 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 588 vj++; 589 v += 36; xp = x + (*vj)*6; 590 } 591 xp = x + k*6; 592 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 593 } 594 595 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 596 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 597 PetscLogFlops(36*(2*a->s_nz + mbs)); 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 603 int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 604 { 605 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 606 IS isrow=a->row; 607 int mbs=a->mbs,*ai=a->i,*aj=a->j; 608 int nz,*vj,k,ierr,*r,idx; 609 MatScalar *aa=a->a,*v,*diag; 610 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 611 612 PetscFunctionBegin; 613 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 615 t = a->solve_work; 616 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 617 618 /* solve U^T * D * y = b by forward substitution */ 619 tp = t; 620 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 621 idx = 5*r[k]; 622 tp[0] = b[idx]; 623 tp[1] = b[idx+1]; 624 tp[2] = b[idx+2]; 625 tp[3] = b[idx+3]; 626 tp[4] = b[idx+4]; 627 tp += 5; 628 } 629 630 for (k=0; k<mbs; k++){ 631 v = aa + 25*ai[k]; 632 vj = aj + ai[k]; 633 tp = t + k*5; 634 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 635 nz = ai[k+1] - ai[k]; 636 637 tp = t + (*vj)*5; 638 while (nz--) { 639 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 640 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 641 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 642 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 643 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 644 vj++; tp = t + (*vj)*5; 645 v += 25; 646 } 647 648 /* xk = inv(Dk)*(Dk*xk) */ 649 diag = aa+k*25; /* ptr to inv(Dk) */ 650 tp = t + k*5; 651 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 652 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 653 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 654 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 655 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 656 } 657 658 /* solve U*x = y by back substitution */ 659 for (k=mbs-1; k>=0; k--){ 660 v = aa + 25*ai[k]; 661 vj = aj + ai[k]; 662 tp = t + k*5; 663 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 664 nz = ai[k+1] - ai[k]; 665 666 tp = t + (*vj)*5; 667 while (nz--) { 668 /* xk += U(k,:)*x(:) */ 669 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 670 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 671 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 672 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 673 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 674 vj++; tp = t + (*vj)*5; 675 v += 25; 676 } 677 tp = t + k*5; 678 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 679 idx = 5*r[k]; 680 x[idx] = x0; 681 x[idx+1] = x1; 682 x[idx+2] = x2; 683 x[idx+3] = x3; 684 x[idx+4] = x4; 685 } 686 687 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 688 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 689 PetscLogFlops(25*(2*a->s_nz + mbs)); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 695 int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 696 { 697 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 698 int mbs=a->mbs,*ai=a->i,*aj=a->j; 699 MatScalar *aa=a->a,*v,*diag; 700 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 701 int nz,*vj,k,ierr; 702 703 PetscFunctionBegin; 704 705 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 707 708 /* solve U^T * D * y = b by forward substitution */ 709 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 710 for (k=0; k<mbs; k++){ 711 v = aa + 25*ai[k]; 712 xp = x + k*5; 713 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 714 nz = ai[k+1] - ai[k]; 715 vj = aj + ai[k]; 716 xp = x + (*vj)*5; 717 while (nz--) { 718 /* x(:) += U(k,:)^T*(Dk*xk) */ 719 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 720 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 721 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 722 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 723 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 724 vj++; xp = x + (*vj)*5; 725 v += 25; 726 } 727 /* xk = inv(Dk)*(Dk*xk) */ 728 diag = aa+k*25; /* ptr to inv(Dk) */ 729 xp = x + k*5; 730 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 731 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 732 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 733 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 734 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 735 } 736 737 /* solve U*x = y by back substitution */ 738 for (k=mbs-1; k>=0; k--){ 739 v = aa + 25*ai[k]; 740 xp = x + k*5; 741 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 742 nz = ai[k+1] - ai[k]; 743 vj = aj + ai[k]; 744 xp = x + (*vj)*5; 745 while (nz--) { 746 /* xk += U(k,:)*x(:) */ 747 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 748 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 749 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 750 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 751 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 752 vj++; 753 v += 25; xp = x + (*vj)*5; 754 } 755 xp = x + k*5; 756 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 757 } 758 759 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 760 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 761 PetscLogFlops(25*(2*a->s_nz + mbs)); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 767 int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 768 { 769 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 770 IS isrow=a->row; 771 int mbs=a->mbs,*ai=a->i,*aj=a->j; 772 int nz,*vj,k,ierr,*r,idx; 773 MatScalar *aa=a->a,*v,*diag; 774 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 775 776 PetscFunctionBegin; 777 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 778 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 779 t = a->solve_work; 780 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 781 782 /* solve U^T * D * y = b by forward substitution */ 783 tp = t; 784 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 785 idx = 4*r[k]; 786 tp[0] = b[idx]; 787 tp[1] = b[idx+1]; 788 tp[2] = b[idx+2]; 789 tp[3] = b[idx+3]; 790 tp += 4; 791 } 792 793 for (k=0; k<mbs; k++){ 794 v = aa + 16*ai[k]; 795 vj = aj + ai[k]; 796 tp = t + k*4; 797 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 798 nz = ai[k+1] - ai[k]; 799 800 tp = t + (*vj)*4; 801 while (nz--) { 802 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 803 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 804 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 805 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 806 vj++; tp = t + (*vj)*4; 807 v += 16; 808 } 809 810 /* xk = inv(Dk)*(Dk*xk) */ 811 diag = aa+k*16; /* ptr to inv(Dk) */ 812 tp = t + k*4; 813 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 814 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 815 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 816 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 817 } 818 819 /* solve U*x = y by back substitution */ 820 for (k=mbs-1; k>=0; k--){ 821 v = aa + 16*ai[k]; 822 vj = aj + ai[k]; 823 tp = t + k*4; 824 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 825 nz = ai[k+1] - ai[k]; 826 827 tp = t + (*vj)*4; 828 while (nz--) { 829 /* xk += U(k,:)*x(:) */ 830 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 831 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 832 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 833 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 834 vj++; tp = t + (*vj)*4; 835 v += 16; 836 } 837 tp = t + k*4; 838 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 839 idx = 4*r[k]; 840 x[idx] = x0; 841 x[idx+1] = x1; 842 x[idx+2] = x2; 843 x[idx+3] = x3; 844 } 845 846 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 847 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 848 PetscLogFlops(16*(2*a->s_nz + mbs)); 849 PetscFunctionReturn(0); 850 } 851 852 /* 853 Special case where the matrix was factored in the natural ordering. 854 This eliminates the need for the column and row permutation. 855 */ 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 858 int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 859 { 860 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 861 int mbs=a->mbs,*ai=a->i,*aj=a->j; 862 MatScalar *aa=a->a,*v,*diag; 863 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 864 int nz,*vj,k,ierr; 865 866 PetscFunctionBegin; 867 868 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 869 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 870 871 /* solve U^T * D * y = b by forward substitution */ 872 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 873 for (k=0; k<mbs; k++){ 874 v = aa + 16*ai[k]; 875 xp = x + k*4; 876 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 877 nz = ai[k+1] - ai[k]; 878 vj = aj + ai[k]; 879 xp = x + (*vj)*4; 880 while (nz--) { 881 /* x(:) += U(k,:)^T*(Dk*xk) */ 882 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 883 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 884 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 885 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 886 vj++; xp = x + (*vj)*4; 887 v += 16; 888 } 889 /* xk = inv(Dk)*(Dk*xk) */ 890 diag = aa+k*16; /* ptr to inv(Dk) */ 891 xp = x + k*4; 892 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 893 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 894 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 895 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 896 } 897 898 /* solve U*x = y by back substitution */ 899 for (k=mbs-1; k>=0; k--){ 900 v = aa + 16*ai[k]; 901 xp = x + k*4; 902 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 903 nz = ai[k+1] - ai[k]; 904 vj = aj + ai[k]; 905 xp = x + (*vj)*4; 906 while (nz--) { 907 /* xk += U(k,:)*x(:) */ 908 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 909 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 910 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 911 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 912 vj++; 913 v += 16; xp = x + (*vj)*4; 914 } 915 xp = x + k*4; 916 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 917 } 918 919 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 920 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 921 PetscLogFlops(16*(2*a->s_nz + mbs)); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 927 int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 928 { 929 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 930 IS isrow=a->row; 931 int mbs=a->mbs,*ai=a->i,*aj=a->j; 932 int nz,*vj,k,ierr,*r,idx; 933 MatScalar *aa=a->a,*v,*diag; 934 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 935 936 PetscFunctionBegin; 937 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 938 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 939 t = a->solve_work; 940 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 941 942 /* solve U^T * D * y = b by forward substitution */ 943 tp = t; 944 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 945 idx = 3*r[k]; 946 tp[0] = b[idx]; 947 tp[1] = b[idx+1]; 948 tp[2] = b[idx+2]; 949 tp += 3; 950 } 951 952 for (k=0; k<mbs; k++){ 953 v = aa + 9*ai[k]; 954 vj = aj + ai[k]; 955 tp = t + k*3; 956 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 957 nz = ai[k+1] - ai[k]; 958 959 tp = t + (*vj)*3; 960 while (nz--) { 961 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 962 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 963 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 964 vj++; tp = t + (*vj)*3; 965 v += 9; 966 } 967 968 /* xk = inv(Dk)*(Dk*xk) */ 969 diag = aa+k*9; /* ptr to inv(Dk) */ 970 tp = t + k*3; 971 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 972 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 973 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 974 } 975 976 /* solve U*x = y by back substitution */ 977 for (k=mbs-1; k>=0; k--){ 978 v = aa + 9*ai[k]; 979 vj = aj + ai[k]; 980 tp = t + k*3; 981 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 982 nz = ai[k+1] - ai[k]; 983 984 tp = t + (*vj)*3; 985 while (nz--) { 986 /* xk += U(k,:)*x(:) */ 987 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 988 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 989 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 990 vj++; tp = t + (*vj)*3; 991 v += 9; 992 } 993 tp = t + k*3; 994 tp[0] = x0; tp[1] = x1; tp[2] = x2; 995 idx = 3*r[k]; 996 x[idx] = x0; 997 x[idx+1] = x1; 998 x[idx+2] = x2; 999 } 1000 1001 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1002 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1003 PetscLogFlops(9*(2*a->s_nz + mbs)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 /* 1008 Special case where the matrix was factored in the natural ordering. 1009 This eliminates the need for the column and row permutation. 1010 */ 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1013 int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1014 { 1015 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1016 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1017 MatScalar *aa=a->a,*v,*diag; 1018 PetscScalar *x,*xp,*b,x0,x1,x2; 1019 int nz,*vj,k,ierr; 1020 1021 PetscFunctionBegin; 1022 1023 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1024 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1025 1026 /* solve U^T * D * y = b by forward substitution */ 1027 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1028 for (k=0; k<mbs; k++){ 1029 v = aa + 9*ai[k]; 1030 xp = x + k*3; 1031 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1032 nz = ai[k+1] - ai[k]; 1033 vj = aj + ai[k]; 1034 xp = x + (*vj)*3; 1035 while (nz--) { 1036 /* x(:) += U(k,:)^T*(Dk*xk) */ 1037 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1038 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1039 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1040 vj++; xp = x + (*vj)*3; 1041 v += 9; 1042 } 1043 /* xk = inv(Dk)*(Dk*xk) */ 1044 diag = aa+k*9; /* ptr to inv(Dk) */ 1045 xp = x + k*3; 1046 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1047 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1048 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1049 } 1050 1051 /* solve U*x = y by back substitution */ 1052 for (k=mbs-1; k>=0; k--){ 1053 v = aa + 9*ai[k]; 1054 xp = x + k*3; 1055 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1056 nz = ai[k+1] - ai[k]; 1057 vj = aj + ai[k]; 1058 xp = x + (*vj)*3; 1059 while (nz--) { 1060 /* xk += U(k,:)*x(:) */ 1061 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1062 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1063 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1064 vj++; 1065 v += 9; xp = x + (*vj)*3; 1066 } 1067 xp = x + k*3; 1068 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1069 } 1070 1071 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 PetscLogFlops(9*(2*a->s_nz + mbs)); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1079 int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1080 { 1081 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1082 IS isrow=a->row; 1083 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1084 int nz,*vj,k,k2,ierr,*r,idx; 1085 MatScalar *aa=a->a,*v,*diag; 1086 PetscScalar *x,*b,x0,x1,*t; 1087 1088 PetscFunctionBegin; 1089 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1090 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1091 t = a->solve_work; 1092 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 1093 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1094 1095 /* solve U^T * D * y = perm(b) by forward substitution */ 1096 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1097 idx = 2*r[k]; 1098 t[k*2] = b[idx]; 1099 t[k*2+1] = b[idx+1]; 1100 } 1101 for (k=0; k<mbs; k++){ 1102 v = aa + 4*ai[k]; 1103 vj = aj + ai[k]; 1104 k2 = k*2; 1105 x0 = t[k2]; x1 = t[k2+1]; 1106 nz = ai[k+1] - ai[k]; 1107 while (nz--) { 1108 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1109 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1110 vj++; v += 4; 1111 } 1112 diag = aa+k*4; /* ptr to inv(Dk) */ 1113 t[k2] = diag[0]*x0 + diag[2]*x1; 1114 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1115 } 1116 1117 /* solve U*x = y by back substitution */ 1118 for (k=mbs-1; k>=0; k--){ 1119 v = aa + 4*ai[k]; 1120 vj = aj + ai[k]; 1121 k2 = k*2; 1122 x0 = t[k2]; x1 = t[k2+1]; 1123 nz = ai[k+1] - ai[k]; 1124 while (nz--) { 1125 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1126 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1127 vj++; v += 4; 1128 } 1129 t[k2] = x0; 1130 t[k2+1] = x1; 1131 idx = 2*r[k]; 1132 x[idx] = x0; 1133 x[idx+1] = x1; 1134 } 1135 1136 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1138 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1139 PetscLogFlops(4*(2*a->s_nz + mbs)); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 /* 1144 Special case where the matrix was factored in the natural ordering. 1145 This eliminates the need for the column and row permutation. 1146 */ 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1149 int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1150 { 1151 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1152 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1153 MatScalar *aa=a->a,*v,*diag; 1154 PetscScalar *x,*b,x0,x1; 1155 int nz,*vj,k,k2,ierr; 1156 1157 PetscFunctionBegin; 1158 1159 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1160 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1161 1162 /* solve U^T * D * y = b by forward substitution */ 1163 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1164 for (k=0; k<mbs; k++){ 1165 v = aa + 4*ai[k]; 1166 vj = aj + ai[k]; 1167 k2 = k*2; 1168 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1169 nz = ai[k+1] - ai[k]; 1170 1171 while (nz--) { 1172 /* x(:) += U(k,:)^T*(Dk*xk) */ 1173 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1174 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1175 vj++; v += 4; 1176 } 1177 /* xk = inv(Dk)*(Dk*xk) */ 1178 diag = aa+k*4; /* ptr to inv(Dk) */ 1179 x[k2] = diag[0]*x0 + diag[2]*x1; 1180 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1181 } 1182 1183 /* solve U*x = y by back substitution */ 1184 for (k=mbs-1; k>=0; k--){ 1185 v = aa + 4*ai[k]; 1186 vj = aj + ai[k]; 1187 k2 = k*2; 1188 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1189 nz = ai[k+1] - ai[k]; 1190 while (nz--) { 1191 /* xk += U(k,:)*x(:) */ 1192 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1193 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1194 vj++; v += 4; 1195 } 1196 x[k2] = x0; 1197 x[k2+1] = x1; 1198 } 1199 1200 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1202 PetscLogFlops(4*(2*a->s_nz + mbs)); /* bs2*(2*a->s_nz + mbs) */ 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1208 int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1209 { 1210 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1211 IS isrow=a->row; 1212 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip; 1213 MatScalar *aa=a->a,*v; 1214 PetscScalar *x,*b,xk,*t; 1215 int nz,*vj,k; 1216 1217 PetscFunctionBegin; 1218 if (!mbs) PetscFunctionReturn(0); 1219 1220 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1221 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1222 t = a->solve_work; 1223 1224 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1225 1226 /* solve U^T*D*y = perm(b) by forward substitution */ 1227 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1228 for (k=0; k<mbs; k++){ 1229 v = aa + ai[k]; 1230 vj = aj + ai[k]; 1231 xk = t[k]; 1232 nz = ai[k+1] - ai[k]; 1233 while (nz--) t[*vj++] += (*v++) * xk; 1234 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1235 } 1236 1237 /* solve U*x = y by back substitution */ 1238 for (k=mbs-1; k>=0; k--){ 1239 v = aa + ai[k]; 1240 vj = aj + ai[k]; 1241 xk = t[k]; 1242 nz = ai[k+1] - ai[k]; 1243 while (nz--) xk += (*v++) * t[*vj++]; 1244 t[k] = xk; 1245 x[rip[k]] = xk; 1246 } 1247 1248 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1249 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1250 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1251 PetscLogFlops(4*a->s_nz + A->m); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1257 int MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1258 { 1259 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1260 int ierr; 1261 1262 PetscFunctionBegin; 1263 if (a->bs == 1) { 1264 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1265 } else { 1266 IS isrow=a->row; 1267 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1268 MatScalar *aa=a->a,*v; 1269 PetscScalar *x,*b,*t; 1270 int nz,*vj,k,n; 1271 if (bb->n > a->solves_work_n) { 1272 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1273 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1274 a->solves_work_n = bb->n; 1275 } 1276 n = bb->n; 1277 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1278 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1279 t = a->solves_work; 1280 1281 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1282 1283 /* solve U^T*D*y = perm(b) by forward substitution */ 1284 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1285 for (k=0; k<mbs; k++){ 1286 v = aa + ai[k]; 1287 vj = aj + ai[k]; 1288 nz = ai[k+1] - ai[k]; 1289 while (nz--) { 1290 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1291 v++;vj++; 1292 } 1293 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1294 } 1295 1296 /* solve U*x = y by back substitution */ 1297 for (k=mbs-1; k>=0; k--){ 1298 v = aa + ai[k]; 1299 vj = aj + ai[k]; 1300 nz = ai[k+1] - ai[k]; 1301 while (nz--) { 1302 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1303 v++;vj++; 1304 } 1305 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1306 } 1307 1308 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1309 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1310 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1311 PetscLogFlops(bb->n*(4*a->s_nz + A->m)); 1312 } 1313 PetscFunctionReturn(0); 1314 } 1315 1316 /* 1317 Special case where the matrix was ILU(0) factored in the natural 1318 ordering. This eliminates the need for the column and row permutation. 1319 */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1322 int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1323 { 1324 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1325 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr; 1326 MatScalar *aa=a->a,*v; 1327 PetscScalar *x,*b,xk; 1328 int nz,*vj,k; 1329 1330 PetscFunctionBegin; 1331 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1332 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1333 1334 /* solve U^T*D*y = b by forward substitution */ 1335 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1336 for (k=0; k<mbs; k++){ 1337 v = aa + ai[k] + 1; 1338 vj = aj + ai[k] + 1; 1339 xk = x[k]; 1340 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1341 while (nz--) x[*vj++] += (*v++) * xk; 1342 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1343 } 1344 1345 /* solve U*x = y by back substitution */ 1346 for (k=mbs-2; k>=0; k--){ 1347 v = aa + ai[k] + 1; 1348 vj = aj + ai[k] + 1; 1349 xk = x[k]; 1350 nz = ai[k+1] - ai[k] - 1; 1351 while (nz--) xk += (*v++) * x[*vj++]; 1352 x[k] = xk; 1353 } 1354 1355 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1356 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1357 PetscLogFlops(4*a->s_nz + A->m); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1364 int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1365 { 1366 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1367 int *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1368 int *jutmp,bs = a->bs,bs2=a->bs2; 1369 int m,realloc = 0,*levtmp,levels = info->levels; 1370 int *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1371 int incrlev,*lev,shift,prow,nz; 1372 int *il,ili,nextprow; 1373 PetscReal f = info->fill; 1374 PetscTruth perm_identity; 1375 1376 PetscFunctionBegin; 1377 /* check whether perm is the identity mapping */ 1378 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1379 1380 /* special case that simply copies fill pattern */ 1381 if (!levels && perm_identity && bs==1) { 1382 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1383 (*B)->factor = FACTOR_CHOLESKY; 1384 b = (Mat_SeqSBAIJ*)(*B)->data; 1385 b->row = perm; 1386 b->icol = perm; 1387 b->factor_damping = info->damping; 1388 b->factor_zeropivot = info->zeropivot; 1389 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1390 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1391 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1392 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1393 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1398 if (levels > 0 && perm_identity && bs==1 ){ 1399 if (!perm_identity) a->permute = PETSC_TRUE; 1400 1401 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1402 1403 if (perm_identity){ /* without permutation */ 1404 ai = a->i; aj = a->j; 1405 } else { /* non-trivial permutation */ 1406 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1407 ai = a->inew; aj = a->jnew; 1408 } 1409 1410 /* initialization */ 1411 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1412 umax = (int)(f*ai[mbs] + 1); 1413 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1414 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1415 iu[0] = 0; 1416 juidx = 0; /* index for ju */ 1417 ierr = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1418 q = jl + mbs; /* linked list for col index of active row */ 1419 levtmp = q + mbs; 1420 il = levtmp + mbs; 1421 for (i=0; i<mbs; i++){ 1422 jl[i] = mbs; 1423 q[i] = 0; 1424 il[i] = 0; 1425 } 1426 1427 /* for each row k */ 1428 for (k=0; k<mbs; k++){ 1429 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1430 q[k] = mbs; 1431 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1432 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1433 jmax = ai[rip[k]+1]; 1434 for (j=jmin; j<jmax; j++){ 1435 vj = rip[aj[j]]; /* col. value */ 1436 if(vj > k){ 1437 qm = k; 1438 do { 1439 m = qm; qm = q[m]; 1440 } while(qm < vj); 1441 if (qm == vj) { 1442 SETERRQ(1," error: duplicate entry in A\n"); 1443 } 1444 nzk++; 1445 q[m] = vj; 1446 q[vj] = qm; 1447 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1448 } /* if(vj > k) */ 1449 } /* for (j=jmin; j<jmax; j++) */ 1450 1451 /* modify nonzero structure of k-th row by computing fill-in 1452 for each row i to be merged in */ 1453 prow = k; 1454 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1455 1456 while (prow < k){ 1457 nextprow = jl[prow]; 1458 /* merge row prow into k-th row */ 1459 ili = il[prow]; 1460 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1461 jmax = iu[prow+1]; 1462 qm = k; 1463 for (j=jmin; j<jmax; j++){ 1464 vj = ju[j]; 1465 incrlev = lev[j] + 1; 1466 if (incrlev > levels) continue; 1467 do { 1468 m = qm; qm = q[m]; 1469 } while (qm < vj); 1470 if (qm != vj){ /* a fill */ 1471 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1472 levtmp[vj] = incrlev; 1473 } else { 1474 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1475 } 1476 } 1477 if (jmin < jmax){ /* update il and jl */ 1478 il[prow] = jmin; 1479 j = ju[jmin]; 1480 jl[prow] = jl[j]; jl[j] = prow; 1481 } 1482 prow = nextprow; 1483 } 1484 1485 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1486 if (nzk > 0){ 1487 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1488 jl[k] = jl[i]; jl[i] = k; 1489 il[k] = iu[k] + 1; 1490 } 1491 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1492 1493 /* allocate more space to ju if needed */ 1494 if (iu[k+1] > umax) { 1495 /* estimate how much additional space we will need */ 1496 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1497 /* just double the memory each time */ 1498 maxadd = umax; 1499 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1500 umax += maxadd; 1501 1502 /* allocate a longer ju */ 1503 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1504 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1505 ierr = PetscFree(ju);CHKERRQ(ierr); 1506 ju = jutmp; 1507 1508 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1509 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr); 1510 ierr = PetscFree(lev);CHKERRQ(ierr); 1511 lev = jutmp; 1512 realloc += 2; /* count how many times we realloc */ 1513 } 1514 1515 /* save nonzero structure of k-th row in ju */ 1516 ju[juidx] = k; /* diag[k] */ 1517 lev[juidx] = 0; 1518 juidx++; 1519 i = k; 1520 while (nzk --) { 1521 i = q[i]; 1522 ju[juidx] = i; 1523 lev[juidx] = levtmp[i]; 1524 juidx++; 1525 } 1526 } /* end of for (k=0; k<mbs; k++) */ 1527 1528 if (ai[mbs] != 0) { 1529 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1530 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1531 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1532 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1533 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1534 } else { 1535 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1536 } 1537 1538 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1539 ierr = PetscFree(jl);CHKERRQ(ierr); 1540 ierr = PetscFree(lev);CHKERRQ(ierr); 1541 1542 /* put together the new matrix */ 1543 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 1544 /* PetscLogObjectParent(*B,iperm); */ 1545 b = (Mat_SeqSBAIJ*)(*B)->data; 1546 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1547 b->singlemalloc = PETSC_FALSE; 1548 /* the next line frees the default space generated by the Create() */ 1549 ierr = PetscFree(b->a);CHKERRQ(ierr); 1550 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1551 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1552 b->j = ju; 1553 b->i = iu; 1554 b->diag = 0; 1555 b->ilen = 0; 1556 b->imax = 0; 1557 b->row = perm; 1558 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1559 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1560 b->icol = perm; 1561 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1562 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1563 /* In b structure: Free imax, ilen, old a, old j. 1564 Allocate idnew, solve_work, new a, new j */ 1565 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1566 b->s_maxnz = b->s_nz = iu[mbs]; 1567 b->factor_damping = info->damping; 1568 b->factor_zeropivot = info->zeropivot; 1569 1570 (*B)->factor = FACTOR_CHOLESKY; 1571 (*B)->info.factor_mallocs = realloc; 1572 (*B)->info.fill_ratio_given = f; 1573 if (ai[mbs] != 0) { 1574 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1575 } else { 1576 (*B)->info.fill_ratio_needed = 0.0; 1577 } 1578 1579 1580 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1581 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1582 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1583 1584 PetscFunctionReturn(0); 1585 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1586 1587 if (!perm_identity) a->permute = PETSC_TRUE; 1588 if (perm_identity){ 1589 ai = a->i; aj = a->j; 1590 } else { /* non-trivial permutation */ 1591 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1592 ai = a->inew; aj = a->jnew; 1593 } 1594 1595 /* initialization */ 1596 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1597 umax = (int)(f*ai[mbs] + 1); 1598 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1599 umax += mbs + 1; 1600 shift = mbs + 1; 1601 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1602 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1603 iu[0] = mbs + 1; 1604 juidx = mbs + 1; 1605 /* prowl: linked list for pivot row */ 1606 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr); 1607 /* q: linked list for col index */ 1608 q = prowl + mbs; 1609 levtmp = q + mbs; 1610 1611 for (i=0; i<mbs; i++){ 1612 prowl[i] = mbs; 1613 q[i] = 0; 1614 } 1615 1616 /* for each row k */ 1617 for (k=0; k<mbs; k++){ 1618 nzk = 0; 1619 q[k] = mbs; 1620 /* copy current row into linked list */ 1621 nz = ai[rip[k]+1] - ai[rip[k]]; 1622 j = ai[rip[k]]; 1623 while (nz--){ 1624 vj = rip[aj[j++]]; 1625 if (vj > k){ 1626 qm = k; 1627 do { 1628 m = qm; qm = q[m]; 1629 } while(qm < vj); 1630 if (qm == vj) { 1631 SETERRQ(1," error: duplicate entry in A\n"); 1632 } 1633 nzk++; 1634 q[m] = vj; 1635 q[vj] = qm; 1636 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1637 } 1638 } 1639 1640 /* modify nonzero structure of k-th row by computing fill-in 1641 for each row prow to be merged in */ 1642 prow = k; 1643 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1644 1645 while (prow < k){ 1646 /* merge row prow into k-th row */ 1647 jmin = iu[prow] + 1; 1648 jmax = iu[prow+1]; 1649 qm = k; 1650 for (j=jmin; j<jmax; j++){ 1651 incrlev = lev[j-shift] + 1; 1652 if (incrlev > levels) continue; 1653 1654 vj = ju[j]; 1655 do { 1656 m = qm; qm = q[m]; 1657 } while (qm < vj); 1658 if (qm != vj){ /* a fill */ 1659 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1660 levtmp[vj] = incrlev; 1661 } else { 1662 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1663 } 1664 } 1665 prow = prowl[prow]; /* next pivot row */ 1666 } 1667 1668 /* add k to row list for first nonzero element in k-th row */ 1669 if (nzk > 1){ 1670 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1671 prowl[k] = prowl[i]; prowl[i] = k; 1672 } 1673 iu[k+1] = iu[k] + nzk; 1674 1675 /* allocate more space to ju and lev if needed */ 1676 if (iu[k+1] > umax) { 1677 /* estimate how much additional space we will need */ 1678 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1679 /* just double the memory each time */ 1680 maxadd = umax; 1681 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1682 umax += maxadd; 1683 1684 /* allocate a longer ju */ 1685 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1686 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1687 ierr = PetscFree(ju);CHKERRQ(ierr); 1688 ju = jutmp; 1689 1690 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1691 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr); 1692 ierr = PetscFree(lev);CHKERRQ(ierr); 1693 lev = jutmp; 1694 realloc += 2; /* count how many times we realloc */ 1695 } 1696 1697 /* save nonzero structure of k-th row in ju */ 1698 i=k; 1699 while (nzk --) { 1700 i = q[i]; 1701 ju[juidx] = i; 1702 lev[juidx-shift] = levtmp[i]; 1703 juidx++; 1704 } 1705 } 1706 1707 if (ai[mbs] != 0) { 1708 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1709 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1710 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1711 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1712 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1713 } else { 1714 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1715 } 1716 1717 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1718 ierr = PetscFree(prowl);CHKERRQ(ierr); 1719 ierr = PetscFree(lev);CHKERRQ(ierr); 1720 1721 /* put together the new matrix */ 1722 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 1723 /* PetscLogObjectParent(*B,iperm); */ 1724 b = (Mat_SeqSBAIJ*)(*B)->data; 1725 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1726 b->singlemalloc = PETSC_FALSE; 1727 /* the next line frees the default space generated by the Create() */ 1728 ierr = PetscFree(b->a);CHKERRQ(ierr); 1729 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1730 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1731 b->j = ju; 1732 b->i = iu; 1733 b->diag = 0; 1734 b->ilen = 0; 1735 b->imax = 0; 1736 1737 if (b->row) { 1738 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1739 } 1740 if (b->icol) { 1741 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1742 } 1743 b->row = perm; 1744 b->icol = perm; 1745 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1746 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1747 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1748 /* In b structure: Free imax, ilen, old a, old j. 1749 Allocate idnew, solve_work, new a, new j */ 1750 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1751 b->s_maxnz = b->s_nz = iu[mbs]; 1752 1753 (*B)->factor = FACTOR_CHOLESKY; 1754 (*B)->info.factor_mallocs = realloc; 1755 (*B)->info.fill_ratio_given = f; 1756 if (ai[mbs] != 0) { 1757 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1758 } else { 1759 (*B)->info.fill_ratio_needed = 0.0; 1760 } 1761 1762 if (perm_identity){ 1763 switch (bs) { 1764 case 1: 1765 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1766 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1767 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1768 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1769 break; 1770 case 2: 1771 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1772 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1773 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1774 break; 1775 case 3: 1776 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1777 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1778 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1779 break; 1780 case 4: 1781 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1782 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1783 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1784 break; 1785 case 5: 1786 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1787 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1788 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1789 break; 1790 case 6: 1791 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1792 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1793 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1794 break; 1795 case 7: 1796 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1797 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1798 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1799 break; 1800 default: 1801 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1802 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1803 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1804 break; 1805 } 1806 } 1807 1808 PetscFunctionReturn(0); 1809 } 1810 1811 1812 1813