1 /*$Id: sbaijfact2.c,v 1.2.1.41 2001/08/07 03:03:01 balay Exp $*/ 2 /* 3 Factorization code for SBAIJ format. 4 */ 5 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/ilu.h" 9 #include "src/inline/dot.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering" 13 int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 14 { 15 PetscFunctionBegin; 16 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 17 } 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering" 21 int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 22 { 23 PetscFunctionBegin; 24 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 25 } 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering" 29 int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 30 { 31 PetscFunctionBegin; 32 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering" 37 int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 38 { 39 PetscFunctionBegin; 40 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 41 } 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering" 45 int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 46 { 47 PetscFunctionBegin; 48 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering" 53 int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 54 { 55 PetscFunctionBegin; 56 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering" 61 int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 62 { 63 PetscFunctionBegin; 64 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_1" 69 int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 70 { 71 PetscFunctionBegin; 72 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_2" 77 int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 78 { 79 PetscFunctionBegin; 80 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_3" 85 int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 86 { 87 PetscFunctionBegin; 88 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_4" 93 int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 94 { 95 PetscFunctionBegin; 96 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_5" 101 int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 102 { 103 PetscFunctionBegin; 104 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_6" 109 int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 110 { 111 PetscFunctionBegin; 112 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatSolveTranspose_SeqSBAIJ_7" 117 int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 118 { 119 PetscFunctionBegin; 120 SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve()."); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 125 int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 126 { 127 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 128 IS isrow=a->row; 129 int mbs=a->mbs,*ai=a->i,*aj=a->j; 130 int nz,*vj,k,ierr,*r,idx,k1; 131 int bs=a->bs,bs2 = a->bs2; 132 MatScalar *aa=a->a,*v,*diag; 133 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 134 135 PetscFunctionBegin; 136 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 138 t = a->solve_work; 139 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 140 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 141 142 /* solve U^T * D * y = b by forward substitution */ 143 xk = t; 144 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 145 idx = bs*r[k]; 146 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 147 } 148 for (k=0; k<mbs; k++){ 149 v = aa + bs2*ai[k]; 150 xk = t + k*bs; /* Dk*xk = k-th block of x */ 151 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 152 nz = ai[k+1] - ai[k]; 153 vj = aj + ai[k]; 154 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 155 while (nz--) { 156 /* x(:) += U(k,:)^T*(Dk*xk) */ 157 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 158 vj++; xj = t + (*vj)*bs; 159 v += bs2; 160 } 161 /* xk = inv(Dk)*(Dk*xk) */ 162 diag = aa+k*bs2; /* ptr to inv(Dk) */ 163 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 164 } 165 166 /* solve U*x = y by back substitution */ 167 for (k=mbs-1; k>=0; k--){ 168 v = aa + bs2*ai[k]; 169 xk = t + k*bs; /* xk */ 170 nz = ai[k+1] - ai[k]; 171 vj = aj + ai[k]; 172 xj = t + (*vj)*bs; 173 while (nz--) { 174 /* xk += U(k,:)*x(:) */ 175 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 176 vj++; 177 v += bs2; xj = t + (*vj)*bs; 178 } 179 idx = bs*r[k]; 180 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 181 } 182 183 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 184 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 185 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 186 PetscLogFlops(bs2*(2*a->nz + mbs)); 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 192 int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 193 { 194 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 195 int mbs=a->mbs,*ai=a->i,*aj=a->j; 196 int nz,*vj,k,ierr; 197 int bs=a->bs,bs2 = a->bs2; 198 MatScalar *aa=a->a,*v,*diag; 199 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 200 201 PetscFunctionBegin; 202 203 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 204 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 205 206 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 207 208 /* solve U^T * D * y = b by forward substitution */ 209 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 210 for (k=0; k<mbs; k++){ 211 v = aa + bs2*ai[k]; 212 xk = x + k*bs; /* Dk*xk = k-th block of x */ 213 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 214 nz = ai[k+1] - ai[k]; 215 vj = aj + ai[k]; 216 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 217 while (nz--) { 218 /* x(:) += U(k,:)^T*(Dk*xk) */ 219 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 220 vj++; xj = x + (*vj)*bs; 221 v += bs2; 222 } 223 /* xk = inv(Dk)*(Dk*xk) */ 224 diag = aa+k*bs2; /* ptr to inv(Dk) */ 225 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 226 } 227 228 /* solve U*x = y by back substitution */ 229 for (k=mbs-1; k>=0; k--){ 230 v = aa + bs2*ai[k]; 231 xk = x + k*bs; /* xk */ 232 nz = ai[k+1] - ai[k]; 233 vj = aj + ai[k]; 234 xj = x + (*vj)*bs; 235 while (nz--) { 236 /* xk += U(k,:)*x(:) */ 237 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 238 vj++; 239 v += bs2; xj = x + (*vj)*bs; 240 } 241 } 242 243 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 244 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 245 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 246 PetscLogFlops(bs2*(2*a->nz + mbs)); 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 252 int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 253 { 254 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 255 IS isrow=a->row; 256 int mbs=a->mbs,*ai=a->i,*aj=a->j; 257 int nz,*vj,k,ierr,*r,idx; 258 MatScalar *aa=a->a,*v,*d; 259 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 260 261 PetscFunctionBegin; 262 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 263 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 264 t = a->solve_work; 265 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 266 267 /* solve U^T * D * y = b by forward substitution */ 268 tp = t; 269 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 270 idx = 7*r[k]; 271 tp[0] = b[idx]; 272 tp[1] = b[idx+1]; 273 tp[2] = b[idx+2]; 274 tp[3] = b[idx+3]; 275 tp[4] = b[idx+4]; 276 tp[5] = b[idx+5]; 277 tp[6] = b[idx+6]; 278 tp += 7; 279 } 280 281 for (k=0; k<mbs; k++){ 282 v = aa + 49*ai[k]; 283 vj = aj + ai[k]; 284 tp = t + k*7; 285 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 286 nz = ai[k+1] - ai[k]; 287 tp = t + (*vj)*7; 288 while (nz--) { 289 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 290 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 291 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 292 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 293 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 294 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 295 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 296 vj++; tp = t + (*vj)*7; 297 v += 49; 298 } 299 300 /* xk = inv(Dk)*(Dk*xk) */ 301 d = aa+k*49; /* ptr to inv(Dk) */ 302 tp = t + k*7; 303 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 304 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 305 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 306 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 307 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 308 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 309 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 310 } 311 312 /* solve U*x = y by back substitution */ 313 for (k=mbs-1; k>=0; k--){ 314 v = aa + 49*ai[k]; 315 vj = aj + ai[k]; 316 tp = t + k*7; 317 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 318 nz = ai[k+1] - ai[k]; 319 320 tp = t + (*vj)*7; 321 while (nz--) { 322 /* xk += U(k,:)*x(:) */ 323 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 324 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 325 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 326 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 327 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 328 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 329 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 330 vj++; tp = t + (*vj)*7; 331 v += 49; 332 } 333 tp = t + k*7; 334 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 335 idx = 7*r[k]; 336 x[idx] = x0; 337 x[idx+1] = x1; 338 x[idx+2] = x2; 339 x[idx+3] = x3; 340 x[idx+4] = x4; 341 x[idx+5] = x5; 342 x[idx+6] = x6; 343 } 344 345 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 346 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 347 PetscLogFlops(49*(2*a->nz + mbs)); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 353 int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 354 { 355 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 356 int mbs=a->mbs,*ai=a->i,*aj=a->j; 357 MatScalar *aa=a->a,*v,*d; 358 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 359 int nz,*vj,k,ierr; 360 361 PetscFunctionBegin; 362 363 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 364 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 365 366 /* solve U^T * D * y = b by forward substitution */ 367 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 368 for (k=0; k<mbs; k++){ 369 v = aa + 49*ai[k]; 370 xp = x + k*7; 371 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 372 nz = ai[k+1] - ai[k]; 373 vj = aj + ai[k]; 374 xp = x + (*vj)*7; 375 while (nz--) { 376 /* x(:) += U(k,:)^T*(Dk*xk) */ 377 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 378 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 379 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 380 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 381 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 382 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 383 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 384 vj++; xp = x + (*vj)*7; 385 v += 49; 386 } 387 /* xk = inv(Dk)*(Dk*xk) */ 388 d = aa+k*49; /* ptr to inv(Dk) */ 389 xp = x + k*7; 390 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 391 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 392 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 393 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 394 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 395 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 396 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 397 } 398 399 /* solve U*x = y by back substitution */ 400 for (k=mbs-1; k>=0; k--){ 401 v = aa + 49*ai[k]; 402 xp = x + k*7; 403 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 404 nz = ai[k+1] - ai[k]; 405 vj = aj + ai[k]; 406 xp = x + (*vj)*7; 407 while (nz--) { 408 /* xk += U(k,:)*x(:) */ 409 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 410 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 411 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 412 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 413 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 414 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 415 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 416 vj++; 417 v += 49; xp = x + (*vj)*7; 418 } 419 xp = x + k*7; 420 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 421 } 422 423 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 PetscLogFlops(49*(2*a->nz + mbs)); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 431 int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 432 { 433 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 434 IS isrow=a->row; 435 int mbs=a->mbs,*ai=a->i,*aj=a->j; 436 int nz,*vj,k,ierr,*r,idx; 437 MatScalar *aa=a->a,*v,*d; 438 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 439 440 PetscFunctionBegin; 441 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 442 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 443 t = a->solve_work; 444 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 445 446 /* solve U^T * D * y = b by forward substitution */ 447 tp = t; 448 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 449 idx = 6*r[k]; 450 tp[0] = b[idx]; 451 tp[1] = b[idx+1]; 452 tp[2] = b[idx+2]; 453 tp[3] = b[idx+3]; 454 tp[4] = b[idx+4]; 455 tp[5] = b[idx+5]; 456 tp += 6; 457 } 458 459 for (k=0; k<mbs; k++){ 460 v = aa + 36*ai[k]; 461 vj = aj + ai[k]; 462 tp = t + k*6; 463 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 464 nz = ai[k+1] - ai[k]; 465 tp = t + (*vj)*6; 466 while (nz--) { 467 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 468 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 469 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 470 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 471 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 472 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 473 vj++; tp = t + (*vj)*6; 474 v += 36; 475 } 476 477 /* xk = inv(Dk)*(Dk*xk) */ 478 d = aa+k*36; /* ptr to inv(Dk) */ 479 tp = t + k*6; 480 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 481 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 482 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 483 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 484 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 485 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 486 } 487 488 /* solve U*x = y by back substitution */ 489 for (k=mbs-1; k>=0; k--){ 490 v = aa + 36*ai[k]; 491 vj = aj + ai[k]; 492 tp = t + k*6; 493 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 494 nz = ai[k+1] - ai[k]; 495 496 tp = t + (*vj)*6; 497 while (nz--) { 498 /* xk += U(k,:)*x(:) */ 499 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 500 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 501 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 502 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 503 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 504 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 505 vj++; tp = t + (*vj)*6; 506 v += 36; 507 } 508 tp = t + k*6; 509 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 510 idx = 6*r[k]; 511 x[idx] = x0; 512 x[idx+1] = x1; 513 x[idx+2] = x2; 514 x[idx+3] = x3; 515 x[idx+4] = x4; 516 x[idx+5] = x5; 517 } 518 519 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 520 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 521 PetscLogFlops(36*(2*a->nz + mbs)); 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 527 int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 528 { 529 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 530 int mbs=a->mbs,*ai=a->i,*aj=a->j; 531 MatScalar *aa=a->a,*v,*d; 532 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 533 int nz,*vj,k,ierr; 534 535 PetscFunctionBegin; 536 537 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 538 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 539 540 /* solve U^T * D * y = b by forward substitution */ 541 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 542 for (k=0; k<mbs; k++){ 543 v = aa + 36*ai[k]; 544 xp = x + k*6; 545 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 546 nz = ai[k+1] - ai[k]; 547 vj = aj + ai[k]; 548 xp = x + (*vj)*6; 549 while (nz--) { 550 /* x(:) += U(k,:)^T*(Dk*xk) */ 551 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 552 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 553 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 554 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 555 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 556 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 557 vj++; xp = x + (*vj)*6; 558 v += 36; 559 } 560 /* xk = inv(Dk)*(Dk*xk) */ 561 d = aa+k*36; /* ptr to inv(Dk) */ 562 xp = x + k*6; 563 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 564 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 565 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 566 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 567 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 568 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 569 } 570 571 /* solve U*x = y by back substitution */ 572 for (k=mbs-1; k>=0; k--){ 573 v = aa + 36*ai[k]; 574 xp = x + k*6; 575 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 576 nz = ai[k+1] - ai[k]; 577 vj = aj + ai[k]; 578 xp = x + (*vj)*6; 579 while (nz--) { 580 /* xk += U(k,:)*x(:) */ 581 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 582 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 583 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 584 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 585 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 586 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 587 vj++; 588 v += 36; xp = x + (*vj)*6; 589 } 590 xp = x + k*6; 591 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 592 } 593 594 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 595 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 596 PetscLogFlops(36*(2*a->nz + mbs)); 597 PetscFunctionReturn(0); 598 } 599 600 #undef __FUNCT__ 601 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 602 int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 603 { 604 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 605 IS isrow=a->row; 606 int mbs=a->mbs,*ai=a->i,*aj=a->j; 607 int nz,*vj,k,ierr,*r,idx; 608 MatScalar *aa=a->a,*v,*diag; 609 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 610 611 PetscFunctionBegin; 612 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 614 t = a->solve_work; 615 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 616 617 /* solve U^T * D * y = b by forward substitution */ 618 tp = t; 619 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 620 idx = 5*r[k]; 621 tp[0] = b[idx]; 622 tp[1] = b[idx+1]; 623 tp[2] = b[idx+2]; 624 tp[3] = b[idx+3]; 625 tp[4] = b[idx+4]; 626 tp += 5; 627 } 628 629 for (k=0; k<mbs; k++){ 630 v = aa + 25*ai[k]; 631 vj = aj + ai[k]; 632 tp = t + k*5; 633 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 634 nz = ai[k+1] - ai[k]; 635 636 tp = t + (*vj)*5; 637 while (nz--) { 638 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 639 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 640 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 641 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 642 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 643 vj++; tp = t + (*vj)*5; 644 v += 25; 645 } 646 647 /* xk = inv(Dk)*(Dk*xk) */ 648 diag = aa+k*25; /* ptr to inv(Dk) */ 649 tp = t + k*5; 650 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 651 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 652 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 653 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 654 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 655 } 656 657 /* solve U*x = y by back substitution */ 658 for (k=mbs-1; k>=0; k--){ 659 v = aa + 25*ai[k]; 660 vj = aj + ai[k]; 661 tp = t + k*5; 662 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 663 nz = ai[k+1] - ai[k]; 664 665 tp = t + (*vj)*5; 666 while (nz--) { 667 /* xk += U(k,:)*x(:) */ 668 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 669 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 670 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 671 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 672 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 673 vj++; tp = t + (*vj)*5; 674 v += 25; 675 } 676 tp = t + k*5; 677 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 678 idx = 5*r[k]; 679 x[idx] = x0; 680 x[idx+1] = x1; 681 x[idx+2] = x2; 682 x[idx+3] = x3; 683 x[idx+4] = x4; 684 } 685 686 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 687 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 688 PetscLogFlops(25*(2*a->nz + mbs)); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 694 int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 695 { 696 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 697 int mbs=a->mbs,*ai=a->i,*aj=a->j; 698 MatScalar *aa=a->a,*v,*diag; 699 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 700 int nz,*vj,k,ierr; 701 702 PetscFunctionBegin; 703 704 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 705 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 706 707 /* solve U^T * D * y = b by forward substitution */ 708 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 709 for (k=0; k<mbs; k++){ 710 v = aa + 25*ai[k]; 711 xp = x + k*5; 712 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 713 nz = ai[k+1] - ai[k]; 714 vj = aj + ai[k]; 715 xp = x + (*vj)*5; 716 while (nz--) { 717 /* x(:) += U(k,:)^T*(Dk*xk) */ 718 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 719 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 720 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 721 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 722 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 723 vj++; xp = x + (*vj)*5; 724 v += 25; 725 } 726 /* xk = inv(Dk)*(Dk*xk) */ 727 diag = aa+k*25; /* ptr to inv(Dk) */ 728 xp = x + k*5; 729 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 730 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 731 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 732 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 733 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 734 } 735 736 /* solve U*x = y by back substitution */ 737 for (k=mbs-1; k>=0; k--){ 738 v = aa + 25*ai[k]; 739 xp = x + k*5; 740 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 741 nz = ai[k+1] - ai[k]; 742 vj = aj + ai[k]; 743 xp = x + (*vj)*5; 744 while (nz--) { 745 /* xk += U(k,:)*x(:) */ 746 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 747 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 748 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 749 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 750 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 751 vj++; 752 v += 25; xp = x + (*vj)*5; 753 } 754 xp = x + k*5; 755 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 756 } 757 758 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 759 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 760 PetscLogFlops(25*(2*a->nz + mbs)); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 766 int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 767 { 768 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 769 IS isrow=a->row; 770 int mbs=a->mbs,*ai=a->i,*aj=a->j; 771 int nz,*vj,k,ierr,*r,idx; 772 MatScalar *aa=a->a,*v,*diag; 773 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 774 775 PetscFunctionBegin; 776 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 777 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 778 t = a->solve_work; 779 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 780 781 /* solve U^T * D * y = b by forward substitution */ 782 tp = t; 783 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 784 idx = 4*r[k]; 785 tp[0] = b[idx]; 786 tp[1] = b[idx+1]; 787 tp[2] = b[idx+2]; 788 tp[3] = b[idx+3]; 789 tp += 4; 790 } 791 792 for (k=0; k<mbs; k++){ 793 v = aa + 16*ai[k]; 794 vj = aj + ai[k]; 795 tp = t + k*4; 796 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 797 nz = ai[k+1] - ai[k]; 798 799 tp = t + (*vj)*4; 800 while (nz--) { 801 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 802 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 803 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 804 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 805 vj++; tp = t + (*vj)*4; 806 v += 16; 807 } 808 809 /* xk = inv(Dk)*(Dk*xk) */ 810 diag = aa+k*16; /* ptr to inv(Dk) */ 811 tp = t + k*4; 812 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 813 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 814 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 815 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 816 } 817 818 /* solve U*x = y by back substitution */ 819 for (k=mbs-1; k>=0; k--){ 820 v = aa + 16*ai[k]; 821 vj = aj + ai[k]; 822 tp = t + k*4; 823 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 824 nz = ai[k+1] - ai[k]; 825 826 tp = t + (*vj)*4; 827 while (nz--) { 828 /* xk += U(k,:)*x(:) */ 829 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 830 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 831 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 832 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 833 vj++; tp = t + (*vj)*4; 834 v += 16; 835 } 836 tp = t + k*4; 837 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 838 idx = 4*r[k]; 839 x[idx] = x0; 840 x[idx+1] = x1; 841 x[idx+2] = x2; 842 x[idx+3] = x3; 843 } 844 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 PetscLogFlops(16*(2*a->nz + mbs)); 848 PetscFunctionReturn(0); 849 } 850 851 /* 852 Special case where the matrix was factored in the natural ordering. 853 This eliminates the need for the column and row permutation. 854 */ 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 857 int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 858 { 859 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 860 int mbs=a->mbs,*ai=a->i,*aj=a->j; 861 MatScalar *aa=a->a,*v,*diag; 862 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 863 int nz,*vj,k,ierr; 864 865 PetscFunctionBegin; 866 867 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 870 /* solve U^T * D * y = b by forward substitution */ 871 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 872 for (k=0; k<mbs; k++){ 873 v = aa + 16*ai[k]; 874 xp = x + k*4; 875 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 876 nz = ai[k+1] - ai[k]; 877 vj = aj + ai[k]; 878 xp = x + (*vj)*4; 879 while (nz--) { 880 /* x(:) += U(k,:)^T*(Dk*xk) */ 881 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 882 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 883 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 884 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 885 vj++; xp = x + (*vj)*4; 886 v += 16; 887 } 888 /* xk = inv(Dk)*(Dk*xk) */ 889 diag = aa+k*16; /* ptr to inv(Dk) */ 890 xp = x + k*4; 891 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 892 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 893 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 894 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 895 } 896 897 /* solve U*x = y by back substitution */ 898 for (k=mbs-1; k>=0; k--){ 899 v = aa + 16*ai[k]; 900 xp = x + k*4; 901 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 902 nz = ai[k+1] - ai[k]; 903 vj = aj + ai[k]; 904 xp = x + (*vj)*4; 905 while (nz--) { 906 /* xk += U(k,:)*x(:) */ 907 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 908 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 909 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 910 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 911 vj++; 912 v += 16; xp = x + (*vj)*4; 913 } 914 xp = x + k*4; 915 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 916 } 917 918 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 920 PetscLogFlops(16*(2*a->nz + mbs)); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 926 int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 927 { 928 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 929 IS isrow=a->row; 930 int mbs=a->mbs,*ai=a->i,*aj=a->j; 931 int nz,*vj,k,ierr,*r,idx; 932 MatScalar *aa=a->a,*v,*diag; 933 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 934 935 PetscFunctionBegin; 936 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 937 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 938 t = a->solve_work; 939 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 940 941 /* solve U^T * D * y = b by forward substitution */ 942 tp = t; 943 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 944 idx = 3*r[k]; 945 tp[0] = b[idx]; 946 tp[1] = b[idx+1]; 947 tp[2] = b[idx+2]; 948 tp += 3; 949 } 950 951 for (k=0; k<mbs; k++){ 952 v = aa + 9*ai[k]; 953 vj = aj + ai[k]; 954 tp = t + k*3; 955 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 956 nz = ai[k+1] - ai[k]; 957 958 tp = t + (*vj)*3; 959 while (nz--) { 960 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 961 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 962 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 963 vj++; tp = t + (*vj)*3; 964 v += 9; 965 } 966 967 /* xk = inv(Dk)*(Dk*xk) */ 968 diag = aa+k*9; /* ptr to inv(Dk) */ 969 tp = t + k*3; 970 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 971 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 972 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 973 } 974 975 /* solve U*x = y by back substitution */ 976 for (k=mbs-1; k>=0; k--){ 977 v = aa + 9*ai[k]; 978 vj = aj + ai[k]; 979 tp = t + k*3; 980 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 981 nz = ai[k+1] - ai[k]; 982 983 tp = t + (*vj)*3; 984 while (nz--) { 985 /* xk += U(k,:)*x(:) */ 986 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 987 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 988 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 989 vj++; tp = t + (*vj)*3; 990 v += 9; 991 } 992 tp = t + k*3; 993 tp[0] = x0; tp[1] = x1; tp[2] = x2; 994 idx = 3*r[k]; 995 x[idx] = x0; 996 x[idx+1] = x1; 997 x[idx+2] = x2; 998 } 999 1000 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1001 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1002 PetscLogFlops(9*(2*a->nz + mbs)); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 /* 1007 Special case where the matrix was factored in the natural ordering. 1008 This eliminates the need for the column and row permutation. 1009 */ 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1012 int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1013 { 1014 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1015 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1016 MatScalar *aa=a->a,*v,*diag; 1017 PetscScalar *x,*xp,*b,x0,x1,x2; 1018 int nz,*vj,k,ierr; 1019 1020 PetscFunctionBegin; 1021 1022 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1023 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1024 1025 /* solve U^T * D * y = b by forward substitution */ 1026 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1027 for (k=0; k<mbs; k++){ 1028 v = aa + 9*ai[k]; 1029 xp = x + k*3; 1030 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1031 nz = ai[k+1] - ai[k]; 1032 vj = aj + ai[k]; 1033 xp = x + (*vj)*3; 1034 while (nz--) { 1035 /* x(:) += U(k,:)^T*(Dk*xk) */ 1036 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1037 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1038 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1039 vj++; xp = x + (*vj)*3; 1040 v += 9; 1041 } 1042 /* xk = inv(Dk)*(Dk*xk) */ 1043 diag = aa+k*9; /* ptr to inv(Dk) */ 1044 xp = x + k*3; 1045 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1046 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1047 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1048 } 1049 1050 /* solve U*x = y by back substitution */ 1051 for (k=mbs-1; k>=0; k--){ 1052 v = aa + 9*ai[k]; 1053 xp = x + k*3; 1054 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1055 nz = ai[k+1] - ai[k]; 1056 vj = aj + ai[k]; 1057 xp = x + (*vj)*3; 1058 while (nz--) { 1059 /* xk += U(k,:)*x(:) */ 1060 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1061 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1062 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1063 vj++; 1064 v += 9; xp = x + (*vj)*3; 1065 } 1066 xp = x + k*3; 1067 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1068 } 1069 1070 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 PetscLogFlops(9*(2*a->nz + mbs)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1078 int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1079 { 1080 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1081 IS isrow=a->row; 1082 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1083 int nz,*vj,k,k2,ierr,*r,idx; 1084 MatScalar *aa=a->a,*v,*diag; 1085 PetscScalar *x,*b,x0,x1,*t; 1086 1087 PetscFunctionBegin; 1088 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1089 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1090 t = a->solve_work; 1091 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 1092 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1093 1094 /* solve U^T * D * y = perm(b) by forward substitution */ 1095 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1096 idx = 2*r[k]; 1097 t[k*2] = b[idx]; 1098 t[k*2+1] = b[idx+1]; 1099 } 1100 for (k=0; k<mbs; k++){ 1101 v = aa + 4*ai[k]; 1102 vj = aj + ai[k]; 1103 k2 = k*2; 1104 x0 = t[k2]; x1 = t[k2+1]; 1105 nz = ai[k+1] - ai[k]; 1106 while (nz--) { 1107 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1108 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1109 vj++; v += 4; 1110 } 1111 diag = aa+k*4; /* ptr to inv(Dk) */ 1112 t[k2] = diag[0]*x0 + diag[2]*x1; 1113 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1114 } 1115 1116 /* solve U*x = y by back substitution */ 1117 for (k=mbs-1; k>=0; k--){ 1118 v = aa + 4*ai[k]; 1119 vj = aj + ai[k]; 1120 k2 = k*2; 1121 x0 = t[k2]; x1 = t[k2+1]; 1122 nz = ai[k+1] - ai[k]; 1123 while (nz--) { 1124 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1125 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1126 vj++; v += 4; 1127 } 1128 t[k2] = x0; 1129 t[k2+1] = x1; 1130 idx = 2*r[k]; 1131 x[idx] = x0; 1132 x[idx+1] = x1; 1133 } 1134 1135 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1138 PetscLogFlops(4*(2*a->nz + mbs)); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 /* 1143 Special case where the matrix was factored in the natural ordering. 1144 This eliminates the need for the column and row permutation. 1145 */ 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1148 int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1151 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1152 MatScalar *aa=a->a,*v,*diag; 1153 PetscScalar *x,*b,x0,x1; 1154 int nz,*vj,k,k2,ierr; 1155 1156 PetscFunctionBegin; 1157 1158 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1159 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1160 1161 /* solve U^T * D * y = b by forward substitution */ 1162 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1163 for (k=0; k<mbs; k++){ 1164 v = aa + 4*ai[k]; 1165 vj = aj + ai[k]; 1166 k2 = k*2; 1167 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1168 nz = ai[k+1] - ai[k]; 1169 1170 while (nz--) { 1171 /* x(:) += U(k,:)^T*(Dk*xk) */ 1172 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1173 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1174 vj++; v += 4; 1175 } 1176 /* xk = inv(Dk)*(Dk*xk) */ 1177 diag = aa+k*4; /* ptr to inv(Dk) */ 1178 x[k2] = diag[0]*x0 + diag[2]*x1; 1179 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1180 } 1181 1182 /* solve U*x = y by back substitution */ 1183 for (k=mbs-1; k>=0; k--){ 1184 v = aa + 4*ai[k]; 1185 vj = aj + ai[k]; 1186 k2 = k*2; 1187 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1188 nz = ai[k+1] - ai[k]; 1189 while (nz--) { 1190 /* xk += U(k,:)*x(:) */ 1191 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1192 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1193 vj++; v += 4; 1194 } 1195 x[k2] = x0; 1196 x[k2+1] = x1; 1197 } 1198 1199 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1201 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1207 int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1208 { 1209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1210 IS isrow=a->row; 1211 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip; 1212 MatScalar *aa=a->a,*v; 1213 PetscScalar *x,*b,xk,*t; 1214 int nz,*vj,k; 1215 1216 PetscFunctionBegin; 1217 if (!mbs) PetscFunctionReturn(0); 1218 1219 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1220 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1221 t = a->solve_work; 1222 1223 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1224 1225 /* solve U^T*D*y = perm(b) by forward substitution */ 1226 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1227 for (k=0; k<mbs; k++){ 1228 v = aa + ai[k]; 1229 vj = aj + ai[k]; 1230 xk = t[k]; 1231 nz = ai[k+1] - ai[k]; 1232 while (nz--) t[*vj++] += (*v++) * xk; 1233 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1234 } 1235 1236 /* solve U*x = y by back substitution */ 1237 for (k=mbs-1; k>=0; k--){ 1238 v = aa + ai[k]; 1239 vj = aj + ai[k]; 1240 xk = t[k]; 1241 nz = ai[k+1] - ai[k]; 1242 while (nz--) xk += (*v++) * t[*vj++]; 1243 t[k] = xk; 1244 x[rip[k]] = xk; 1245 } 1246 1247 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1248 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1249 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1250 PetscLogFlops(4*a->nz + A->m); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1256 int MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1257 { 1258 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1259 int ierr; 1260 1261 PetscFunctionBegin; 1262 if (a->bs == 1) { 1263 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1264 } else { 1265 IS isrow=a->row; 1266 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1267 MatScalar *aa=a->a,*v; 1268 PetscScalar *x,*b,*t; 1269 int nz,*vj,k,n; 1270 if (bb->n > a->solves_work_n) { 1271 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1272 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1273 a->solves_work_n = bb->n; 1274 } 1275 n = bb->n; 1276 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1277 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1278 t = a->solves_work; 1279 1280 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1281 1282 /* solve U^T*D*y = perm(b) by forward substitution */ 1283 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1284 for (k=0; k<mbs; k++){ 1285 v = aa + ai[k]; 1286 vj = aj + ai[k]; 1287 nz = ai[k+1] - ai[k]; 1288 while (nz--) { 1289 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1290 v++;vj++; 1291 } 1292 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1293 } 1294 1295 /* solve U*x = y by back substitution */ 1296 for (k=mbs-1; k>=0; k--){ 1297 v = aa + ai[k]; 1298 vj = aj + ai[k]; 1299 nz = ai[k+1] - ai[k]; 1300 while (nz--) { 1301 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1302 v++;vj++; 1303 } 1304 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1305 } 1306 1307 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1309 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1310 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1311 } 1312 PetscFunctionReturn(0); 1313 } 1314 1315 /* 1316 Special case where the matrix was ILU(0) factored in the natural 1317 ordering. This eliminates the need for the column and row permutation. 1318 */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1321 int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1322 { 1323 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1324 int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr; 1325 MatScalar *aa=a->a,*v; 1326 PetscScalar *x,*b,xk; 1327 int nz,*vj,k; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1331 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1332 1333 /* solve U^T*D*y = b by forward substitution */ 1334 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1335 for (k=0; k<mbs; k++){ 1336 v = aa + ai[k] + 1; 1337 vj = aj + ai[k] + 1; 1338 xk = x[k]; 1339 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1340 while (nz--) x[*vj++] += (*v++) * xk; 1341 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1342 } 1343 1344 /* solve U*x = y by back substitution */ 1345 for (k=mbs-2; k>=0; k--){ 1346 v = aa + ai[k] + 1; 1347 vj = aj + ai[k] + 1; 1348 xk = x[k]; 1349 nz = ai[k+1] - ai[k] - 1; 1350 while (nz--) xk += (*v++) * x[*vj++]; 1351 x[k] = xk; 1352 } 1353 1354 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1355 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1356 PetscLogFlops(4*a->nz + A->m); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1363 int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1364 { 1365 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1366 int *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1367 int *jutmp,bs = a->bs,bs2=a->bs2; 1368 int m,realloc = 0,*levtmp; 1369 int *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1370 int incrlev,*lev,shift,prow,nz; 1371 int *il,ili,nextprow; 1372 PetscReal f = info->fill,levels = info->levels; 1373 PetscTruth perm_identity; 1374 1375 PetscFunctionBegin; 1376 /* check whether perm is the identity mapping */ 1377 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1378 1379 /* special case that simply copies fill pattern */ 1380 if (!levels && perm_identity && bs==1) { 1381 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1382 (*B)->factor = FACTOR_CHOLESKY; 1383 b = (Mat_SeqSBAIJ*)(*B)->data; 1384 b->row = perm; 1385 b->icol = perm; 1386 b->factor_damping = info->damping; 1387 b->factor_shift = info->shift; 1388 b->factor_zeropivot = info->zeropivot; 1389 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1390 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1391 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1392 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1393 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1398 if (levels > 0 && perm_identity && bs==1 ){ 1399 if (!perm_identity) a->permute = PETSC_TRUE; 1400 1401 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1402 1403 if (perm_identity){ /* without permutation */ 1404 ai = a->i; aj = a->j; 1405 } else { /* non-trivial permutation */ 1406 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1407 ai = a->inew; aj = a->jnew; 1408 } 1409 1410 /* initialization */ 1411 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1412 umax = (int)(f*ai[mbs] + 1); 1413 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1414 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1415 iu[0] = 0; 1416 juidx = 0; /* index for ju */ 1417 ierr = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1418 q = jl + mbs; /* linked list for col index of active row */ 1419 levtmp = q + mbs; 1420 il = levtmp + mbs; 1421 for (i=0; i<mbs; i++){ 1422 jl[i] = mbs; 1423 q[i] = 0; 1424 il[i] = 0; 1425 } 1426 1427 /* for each row k */ 1428 for (k=0; k<mbs; k++){ 1429 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1430 q[k] = mbs; 1431 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1432 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1433 jmax = ai[rip[k]+1]; 1434 for (j=jmin; j<jmax; j++){ 1435 vj = rip[aj[j]]; /* col. value */ 1436 if(vj > k){ 1437 qm = k; 1438 do { 1439 m = qm; qm = q[m]; 1440 } while(qm < vj); 1441 if (qm == vj) { 1442 SETERRQ(1," error: duplicate entry in A\n"); 1443 } 1444 nzk++; 1445 q[m] = vj; 1446 q[vj] = qm; 1447 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1448 } /* if(vj > k) */ 1449 } /* for (j=jmin; j<jmax; j++) */ 1450 1451 /* modify nonzero structure of k-th row by computing fill-in 1452 for each row i to be merged in */ 1453 prow = k; 1454 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1455 1456 while (prow < k){ 1457 nextprow = jl[prow]; 1458 /* merge row prow into k-th row */ 1459 ili = il[prow]; 1460 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1461 jmax = iu[prow+1]; 1462 qm = k; 1463 for (j=jmin; j<jmax; j++){ 1464 vj = ju[j]; 1465 incrlev = lev[j] + 1; 1466 if (incrlev > levels) continue; 1467 do { 1468 m = qm; qm = q[m]; 1469 } while (qm < vj); 1470 if (qm != vj){ /* a fill */ 1471 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1472 levtmp[vj] = incrlev; 1473 } else { 1474 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1475 } 1476 } 1477 if (jmin < jmax){ /* update il and jl */ 1478 il[prow] = jmin; 1479 j = ju[jmin]; 1480 jl[prow] = jl[j]; jl[j] = prow; 1481 } 1482 prow = nextprow; 1483 } 1484 1485 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1486 if (nzk > 0){ 1487 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1488 jl[k] = jl[i]; jl[i] = k; 1489 il[k] = iu[k] + 1; 1490 } 1491 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1492 1493 /* allocate more space to ju if needed */ 1494 if (iu[k+1] > umax) { 1495 /* estimate how much additional space we will need */ 1496 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1497 /* just double the memory each time */ 1498 maxadd = umax; 1499 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1500 umax += maxadd; 1501 1502 /* allocate a longer ju */ 1503 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1504 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1505 ierr = PetscFree(ju);CHKERRQ(ierr); 1506 ju = jutmp; 1507 1508 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1509 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr); 1510 ierr = PetscFree(lev);CHKERRQ(ierr); 1511 lev = jutmp; 1512 realloc += 2; /* count how many times we realloc */ 1513 } 1514 1515 /* save nonzero structure of k-th row in ju */ 1516 ju[juidx] = k; /* diag[k] */ 1517 lev[juidx] = 0; 1518 juidx++; 1519 i = k; 1520 while (nzk --) { 1521 i = q[i]; 1522 ju[juidx] = i; 1523 lev[juidx] = levtmp[i]; 1524 juidx++; 1525 } 1526 } /* end of for (k=0; k<mbs; k++) */ 1527 1528 if (ai[mbs] != 0) { 1529 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1530 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1531 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1532 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1533 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1534 } else { 1535 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1536 } 1537 1538 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1539 ierr = PetscFree(jl);CHKERRQ(ierr); 1540 ierr = PetscFree(lev);CHKERRQ(ierr); 1541 1542 /* put together the new matrix */ 1543 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 1544 /* PetscLogObjectParent(*B,iperm); */ 1545 b = (Mat_SeqSBAIJ*)(*B)->data; 1546 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1547 b->singlemalloc = PETSC_FALSE; 1548 /* the next line frees the default space generated by the Create() */ 1549 ierr = PetscFree(b->a);CHKERRQ(ierr); 1550 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1551 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1552 b->j = ju; 1553 b->i = iu; 1554 b->diag = 0; 1555 b->ilen = 0; 1556 b->imax = 0; 1557 b->row = perm; 1558 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1559 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1560 b->icol = perm; 1561 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1562 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1563 /* In b structure: Free imax, ilen, old a, old j. 1564 Allocate idnew, solve_work, new a, new j */ 1565 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1566 b->maxnz = b->nz = iu[mbs]; 1567 b->factor_damping = info->damping; 1568 b->factor_shift = info->shift; 1569 b->factor_zeropivot = info->zeropivot; 1570 1571 (*B)->factor = FACTOR_CHOLESKY; 1572 (*B)->info.factor_mallocs = realloc; 1573 (*B)->info.fill_ratio_given = f; 1574 if (ai[mbs] != 0) { 1575 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1576 } else { 1577 (*B)->info.fill_ratio_needed = 0.0; 1578 } 1579 1580 1581 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1582 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1583 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1584 1585 PetscFunctionReturn(0); 1586 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1587 1588 if (!perm_identity) a->permute = PETSC_TRUE; 1589 if (perm_identity){ 1590 ai = a->i; aj = a->j; 1591 } else { /* non-trivial permutation */ 1592 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1593 ai = a->inew; aj = a->jnew; 1594 } 1595 1596 /* initialization */ 1597 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1598 umax = (int)(f*ai[mbs] + 1); 1599 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1600 umax += mbs + 1; 1601 shift = mbs + 1; 1602 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1603 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1604 iu[0] = mbs + 1; 1605 juidx = mbs + 1; 1606 /* prowl: linked list for pivot row */ 1607 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr); 1608 /* q: linked list for col index */ 1609 q = prowl + mbs; 1610 levtmp = q + mbs; 1611 1612 for (i=0; i<mbs; i++){ 1613 prowl[i] = mbs; 1614 q[i] = 0; 1615 } 1616 1617 /* for each row k */ 1618 for (k=0; k<mbs; k++){ 1619 nzk = 0; 1620 q[k] = mbs; 1621 /* copy current row into linked list */ 1622 nz = ai[rip[k]+1] - ai[rip[k]]; 1623 j = ai[rip[k]]; 1624 while (nz--){ 1625 vj = rip[aj[j++]]; 1626 if (vj > k){ 1627 qm = k; 1628 do { 1629 m = qm; qm = q[m]; 1630 } while(qm < vj); 1631 if (qm == vj) { 1632 SETERRQ(1," error: duplicate entry in A\n"); 1633 } 1634 nzk++; 1635 q[m] = vj; 1636 q[vj] = qm; 1637 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1638 } 1639 } 1640 1641 /* modify nonzero structure of k-th row by computing fill-in 1642 for each row prow to be merged in */ 1643 prow = k; 1644 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1645 1646 while (prow < k){ 1647 /* merge row prow into k-th row */ 1648 jmin = iu[prow] + 1; 1649 jmax = iu[prow+1]; 1650 qm = k; 1651 for (j=jmin; j<jmax; j++){ 1652 incrlev = lev[j-shift] + 1; 1653 if (incrlev > levels) continue; 1654 1655 vj = ju[j]; 1656 do { 1657 m = qm; qm = q[m]; 1658 } while (qm < vj); 1659 if (qm != vj){ /* a fill */ 1660 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1661 levtmp[vj] = incrlev; 1662 } else { 1663 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1664 } 1665 } 1666 prow = prowl[prow]; /* next pivot row */ 1667 } 1668 1669 /* add k to row list for first nonzero element in k-th row */ 1670 if (nzk > 1){ 1671 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1672 prowl[k] = prowl[i]; prowl[i] = k; 1673 } 1674 iu[k+1] = iu[k] + nzk; 1675 1676 /* allocate more space to ju and lev if needed */ 1677 if (iu[k+1] > umax) { 1678 /* estimate how much additional space we will need */ 1679 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1680 /* just double the memory each time */ 1681 maxadd = umax; 1682 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1683 umax += maxadd; 1684 1685 /* allocate a longer ju */ 1686 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1687 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1688 ierr = PetscFree(ju);CHKERRQ(ierr); 1689 ju = jutmp; 1690 1691 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1692 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr); 1693 ierr = PetscFree(lev);CHKERRQ(ierr); 1694 lev = jutmp; 1695 realloc += 2; /* count how many times we realloc */ 1696 } 1697 1698 /* save nonzero structure of k-th row in ju */ 1699 i=k; 1700 while (nzk --) { 1701 i = q[i]; 1702 ju[juidx] = i; 1703 lev[juidx-shift] = levtmp[i]; 1704 juidx++; 1705 } 1706 } 1707 1708 if (ai[mbs] != 0) { 1709 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1710 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1711 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1712 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1713 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1714 } else { 1715 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1716 } 1717 1718 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1719 ierr = PetscFree(prowl);CHKERRQ(ierr); 1720 ierr = PetscFree(lev);CHKERRQ(ierr); 1721 1722 /* put together the new matrix */ 1723 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 1724 /* PetscLogObjectParent(*B,iperm); */ 1725 b = (Mat_SeqSBAIJ*)(*B)->data; 1726 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1727 b->singlemalloc = PETSC_FALSE; 1728 /* the next line frees the default space generated by the Create() */ 1729 ierr = PetscFree(b->a);CHKERRQ(ierr); 1730 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1731 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1732 b->j = ju; 1733 b->i = iu; 1734 b->diag = 0; 1735 b->ilen = 0; 1736 b->imax = 0; 1737 1738 if (b->row) { 1739 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1740 } 1741 if (b->icol) { 1742 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1743 } 1744 b->row = perm; 1745 b->icol = perm; 1746 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1747 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1748 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1749 /* In b structure: Free imax, ilen, old a, old j. 1750 Allocate idnew, solve_work, new a, new j */ 1751 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1752 b->maxnz = b->nz = iu[mbs]; 1753 1754 (*B)->factor = FACTOR_CHOLESKY; 1755 (*B)->info.factor_mallocs = realloc; 1756 (*B)->info.fill_ratio_given = f; 1757 if (ai[mbs] != 0) { 1758 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1759 } else { 1760 (*B)->info.fill_ratio_needed = 0.0; 1761 } 1762 1763 if (perm_identity){ 1764 switch (bs) { 1765 case 1: 1766 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1767 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1768 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1769 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1770 break; 1771 case 2: 1772 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1773 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1774 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1775 break; 1776 case 3: 1777 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1778 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1779 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1780 break; 1781 case 4: 1782 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1783 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1784 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1785 break; 1786 case 5: 1787 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1788 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1789 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1790 break; 1791 case 6: 1792 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1793 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1794 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1795 break; 1796 case 7: 1797 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1798 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1799 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1800 break; 1801 default: 1802 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1803 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1804 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1805 break; 1806 } 1807 } 1808 1809 PetscFunctionReturn(0); 1810 } 1811 1812 1813 1814