1 /* 2 Factorization code for SBAIJ format. 3 */ 4 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 #include "src/inline/dot.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 13 { 14 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 15 IS isrow=a->row; 16 int mbs=a->mbs,*ai=a->i,*aj=a->j; 17 PetscErrorCode ierr; 18 int nz,*vj,k,*r,idx,k1; 19 int bs=a->bs,bs2 = a->bs2; 20 MatScalar *aa=a->a,*v,*diag; 21 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 22 23 PetscFunctionBegin; 24 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 25 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 26 t = a->solve_work; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 29 30 /* solve U^T * D * y = b by forward substitution */ 31 xk = t; 32 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 33 idx = bs*r[k]; 34 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 35 } 36 for (k=0; k<mbs; k++){ 37 v = aa + bs2*ai[k]; 38 xk = t + k*bs; /* Dk*xk = k-th block of x */ 39 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 40 nz = ai[k+1] - ai[k]; 41 vj = aj + ai[k]; 42 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 43 while (nz--) { 44 /* x(:) += U(k,:)^T*(Dk*xk) */ 45 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 46 vj++; xj = t + (*vj)*bs; 47 v += bs2; 48 } 49 /* xk = inv(Dk)*(Dk*xk) */ 50 diag = aa+k*bs2; /* ptr to inv(Dk) */ 51 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 52 } 53 54 /* solve U*x = y by back substitution */ 55 for (k=mbs-1; k>=0; k--){ 56 v = aa + bs2*ai[k]; 57 xk = t + k*bs; /* xk */ 58 nz = ai[k+1] - ai[k]; 59 vj = aj + ai[k]; 60 xj = t + (*vj)*bs; 61 while (nz--) { 62 /* xk += U(k,:)*x(:) */ 63 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 64 vj++; 65 v += bs2; xj = t + (*vj)*bs; 66 } 67 idx = bs*r[k]; 68 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 69 } 70 71 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 72 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 73 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 74 PetscLogFlops(bs2*(2*a->nz + mbs)); 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 80 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 81 { 82 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 83 PetscErrorCode ierr; 84 int mbs=a->mbs,*ai=a->i,*aj=a->j; 85 int nz,*vj,k; 86 int bs=a->bs,bs2 = a->bs2; 87 MatScalar *aa=a->a,*v,*diag; 88 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 89 90 PetscFunctionBegin; 91 92 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 93 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 94 95 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 96 97 /* solve U^T * D * y = b by forward substitution */ 98 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 99 for (k=0; k<mbs; k++){ 100 v = aa + bs2*ai[k]; 101 xk = x + k*bs; /* Dk*xk = k-th block of x */ 102 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 103 nz = ai[k+1] - ai[k]; 104 vj = aj + ai[k]; 105 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 106 while (nz--) { 107 /* x(:) += U(k,:)^T*(Dk*xk) */ 108 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 109 vj++; xj = x + (*vj)*bs; 110 v += bs2; 111 } 112 /* xk = inv(Dk)*(Dk*xk) */ 113 diag = aa+k*bs2; /* ptr to inv(Dk) */ 114 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 115 } 116 117 /* solve U*x = y by back substitution */ 118 for (k=mbs-1; k>=0; k--){ 119 v = aa + bs2*ai[k]; 120 xk = x + k*bs; /* xk */ 121 nz = ai[k+1] - ai[k]; 122 vj = aj + ai[k]; 123 xj = x + (*vj)*bs; 124 while (nz--) { 125 /* xk += U(k,:)*x(:) */ 126 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 127 vj++; 128 v += bs2; xj = x + (*vj)*bs; 129 } 130 } 131 132 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 133 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 134 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 135 PetscLogFlops(bs2*(2*a->nz + mbs)); 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 141 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 142 { 143 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 144 IS isrow=a->row; 145 int mbs=a->mbs,*ai=a->i,*aj=a->j; 146 PetscErrorCode ierr; 147 int nz,*vj,k,*r,idx; 148 MatScalar *aa=a->a,*v,*d; 149 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 150 151 PetscFunctionBegin; 152 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 154 t = a->solve_work; 155 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 156 157 /* solve U^T * D * y = b by forward substitution */ 158 tp = t; 159 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 160 idx = 7*r[k]; 161 tp[0] = b[idx]; 162 tp[1] = b[idx+1]; 163 tp[2] = b[idx+2]; 164 tp[3] = b[idx+3]; 165 tp[4] = b[idx+4]; 166 tp[5] = b[idx+5]; 167 tp[6] = b[idx+6]; 168 tp += 7; 169 } 170 171 for (k=0; k<mbs; k++){ 172 v = aa + 49*ai[k]; 173 vj = aj + ai[k]; 174 tp = t + k*7; 175 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 176 nz = ai[k+1] - ai[k]; 177 tp = t + (*vj)*7; 178 while (nz--) { 179 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 180 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 181 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 182 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 183 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 184 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 185 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 186 vj++; tp = t + (*vj)*7; 187 v += 49; 188 } 189 190 /* xk = inv(Dk)*(Dk*xk) */ 191 d = aa+k*49; /* ptr to inv(Dk) */ 192 tp = t + k*7; 193 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 194 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 195 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 196 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 197 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 198 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 199 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 200 } 201 202 /* solve U*x = y by back substitution */ 203 for (k=mbs-1; k>=0; k--){ 204 v = aa + 49*ai[k]; 205 vj = aj + ai[k]; 206 tp = t + k*7; 207 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 208 nz = ai[k+1] - ai[k]; 209 210 tp = t + (*vj)*7; 211 while (nz--) { 212 /* xk += U(k,:)*x(:) */ 213 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 214 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 215 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 216 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 217 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 218 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 219 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 220 vj++; tp = t + (*vj)*7; 221 v += 49; 222 } 223 tp = t + k*7; 224 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 225 idx = 7*r[k]; 226 x[idx] = x0; 227 x[idx+1] = x1; 228 x[idx+2] = x2; 229 x[idx+3] = x3; 230 x[idx+4] = x4; 231 x[idx+5] = x5; 232 x[idx+6] = x6; 233 } 234 235 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 236 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 237 PetscLogFlops(49*(2*a->nz + mbs)); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 243 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 244 { 245 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 246 PetscErrorCode ierr; 247 int mbs=a->mbs,*ai=a->i,*aj=a->j; 248 MatScalar *aa=a->a,*v,*d; 249 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 250 int nz,*vj,k; 251 252 PetscFunctionBegin; 253 254 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 255 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 256 257 /* solve U^T * D * y = b by forward substitution */ 258 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 259 for (k=0; k<mbs; k++){ 260 v = aa + 49*ai[k]; 261 xp = x + k*7; 262 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 263 nz = ai[k+1] - ai[k]; 264 vj = aj + ai[k]; 265 xp = x + (*vj)*7; 266 while (nz--) { 267 /* x(:) += U(k,:)^T*(Dk*xk) */ 268 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 269 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 270 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 271 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 272 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 273 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 274 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 275 vj++; xp = x + (*vj)*7; 276 v += 49; 277 } 278 /* xk = inv(Dk)*(Dk*xk) */ 279 d = aa+k*49; /* ptr to inv(Dk) */ 280 xp = x + k*7; 281 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 282 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 283 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 284 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 285 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 286 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 287 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 288 } 289 290 /* solve U*x = y by back substitution */ 291 for (k=mbs-1; k>=0; k--){ 292 v = aa + 49*ai[k]; 293 xp = x + k*7; 294 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 295 nz = ai[k+1] - ai[k]; 296 vj = aj + ai[k]; 297 xp = x + (*vj)*7; 298 while (nz--) { 299 /* xk += U(k,:)*x(:) */ 300 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 301 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 302 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 303 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 304 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 305 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 306 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 307 vj++; 308 v += 49; xp = x + (*vj)*7; 309 } 310 xp = x + k*7; 311 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 312 } 313 314 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 315 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 316 PetscLogFlops(49*(2*a->nz + mbs)); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 322 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 323 { 324 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 325 IS isrow=a->row; 326 int mbs=a->mbs,*ai=a->i,*aj=a->j; 327 PetscErrorCode ierr; 328 int nz,*vj,k,*r,idx; 329 MatScalar *aa=a->a,*v,*d; 330 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 331 332 PetscFunctionBegin; 333 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 334 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 335 t = a->solve_work; 336 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 337 338 /* solve U^T * D * y = b by forward substitution */ 339 tp = t; 340 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 341 idx = 6*r[k]; 342 tp[0] = b[idx]; 343 tp[1] = b[idx+1]; 344 tp[2] = b[idx+2]; 345 tp[3] = b[idx+3]; 346 tp[4] = b[idx+4]; 347 tp[5] = b[idx+5]; 348 tp += 6; 349 } 350 351 for (k=0; k<mbs; k++){ 352 v = aa + 36*ai[k]; 353 vj = aj + ai[k]; 354 tp = t + k*6; 355 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 356 nz = ai[k+1] - ai[k]; 357 tp = t + (*vj)*6; 358 while (nz--) { 359 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 360 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 361 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 362 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 363 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 364 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 365 vj++; tp = t + (*vj)*6; 366 v += 36; 367 } 368 369 /* xk = inv(Dk)*(Dk*xk) */ 370 d = aa+k*36; /* ptr to inv(Dk) */ 371 tp = t + k*6; 372 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 373 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 374 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 375 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 376 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 377 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 378 } 379 380 /* solve U*x = y by back substitution */ 381 for (k=mbs-1; k>=0; k--){ 382 v = aa + 36*ai[k]; 383 vj = aj + ai[k]; 384 tp = t + k*6; 385 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 386 nz = ai[k+1] - ai[k]; 387 388 tp = t + (*vj)*6; 389 while (nz--) { 390 /* xk += U(k,:)*x(:) */ 391 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 392 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 393 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 394 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 395 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 396 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 397 vj++; tp = t + (*vj)*6; 398 v += 36; 399 } 400 tp = t + k*6; 401 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 402 idx = 6*r[k]; 403 x[idx] = x0; 404 x[idx+1] = x1; 405 x[idx+2] = x2; 406 x[idx+3] = x3; 407 x[idx+4] = x4; 408 x[idx+5] = x5; 409 } 410 411 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 412 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 413 PetscLogFlops(36*(2*a->nz + mbs)); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 419 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 420 { 421 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 422 int mbs=a->mbs,*ai=a->i,*aj=a->j; 423 MatScalar *aa=a->a,*v,*d; 424 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 425 PetscErrorCode ierr; 426 int nz,*vj,k; 427 428 PetscFunctionBegin; 429 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 433 /* solve U^T * D * y = b by forward substitution */ 434 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 435 for (k=0; k<mbs; k++){ 436 v = aa + 36*ai[k]; 437 xp = x + k*6; 438 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 439 nz = ai[k+1] - ai[k]; 440 vj = aj + ai[k]; 441 xp = x + (*vj)*6; 442 while (nz--) { 443 /* x(:) += U(k,:)^T*(Dk*xk) */ 444 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 445 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 446 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 447 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 448 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 449 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 450 vj++; xp = x + (*vj)*6; 451 v += 36; 452 } 453 /* xk = inv(Dk)*(Dk*xk) */ 454 d = aa+k*36; /* ptr to inv(Dk) */ 455 xp = x + k*6; 456 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 457 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 458 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 459 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 460 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 461 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 462 } 463 464 /* solve U*x = y by back substitution */ 465 for (k=mbs-1; k>=0; k--){ 466 v = aa + 36*ai[k]; 467 xp = x + k*6; 468 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 469 nz = ai[k+1] - ai[k]; 470 vj = aj + ai[k]; 471 xp = x + (*vj)*6; 472 while (nz--) { 473 /* xk += U(k,:)*x(:) */ 474 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 475 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 476 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 477 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 478 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 479 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 480 vj++; 481 v += 36; xp = x + (*vj)*6; 482 } 483 xp = x + k*6; 484 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 485 } 486 487 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 488 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 489 PetscLogFlops(36*(2*a->nz + mbs)); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 495 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 496 { 497 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 498 IS isrow=a->row; 499 int mbs=a->mbs,*ai=a->i,*aj=a->j; 500 PetscErrorCode ierr; 501 int nz,*vj,k,*r,idx; 502 MatScalar *aa=a->a,*v,*diag; 503 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 504 505 PetscFunctionBegin; 506 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 507 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 508 t = a->solve_work; 509 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 510 511 /* solve U^T * D * y = b by forward substitution */ 512 tp = t; 513 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 514 idx = 5*r[k]; 515 tp[0] = b[idx]; 516 tp[1] = b[idx+1]; 517 tp[2] = b[idx+2]; 518 tp[3] = b[idx+3]; 519 tp[4] = b[idx+4]; 520 tp += 5; 521 } 522 523 for (k=0; k<mbs; k++){ 524 v = aa + 25*ai[k]; 525 vj = aj + ai[k]; 526 tp = t + k*5; 527 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 528 nz = ai[k+1] - ai[k]; 529 530 tp = t + (*vj)*5; 531 while (nz--) { 532 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 533 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 534 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 535 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 536 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 537 vj++; tp = t + (*vj)*5; 538 v += 25; 539 } 540 541 /* xk = inv(Dk)*(Dk*xk) */ 542 diag = aa+k*25; /* ptr to inv(Dk) */ 543 tp = t + k*5; 544 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 545 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 546 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 547 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 548 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 549 } 550 551 /* solve U*x = y by back substitution */ 552 for (k=mbs-1; k>=0; k--){ 553 v = aa + 25*ai[k]; 554 vj = aj + ai[k]; 555 tp = t + k*5; 556 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 557 nz = ai[k+1] - ai[k]; 558 559 tp = t + (*vj)*5; 560 while (nz--) { 561 /* xk += U(k,:)*x(:) */ 562 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 563 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 564 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 565 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 566 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 567 vj++; tp = t + (*vj)*5; 568 v += 25; 569 } 570 tp = t + k*5; 571 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 572 idx = 5*r[k]; 573 x[idx] = x0; 574 x[idx+1] = x1; 575 x[idx+2] = x2; 576 x[idx+3] = x3; 577 x[idx+4] = x4; 578 } 579 580 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 581 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 582 PetscLogFlops(25*(2*a->nz + mbs)); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 588 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 589 { 590 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 591 int mbs=a->mbs,*ai=a->i,*aj=a->j; 592 MatScalar *aa=a->a,*v,*diag; 593 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 594 PetscErrorCode ierr; 595 int nz,*vj,k; 596 597 PetscFunctionBegin; 598 599 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 600 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 601 602 /* solve U^T * D * y = b by forward substitution */ 603 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 604 for (k=0; k<mbs; k++){ 605 v = aa + 25*ai[k]; 606 xp = x + k*5; 607 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 608 nz = ai[k+1] - ai[k]; 609 vj = aj + ai[k]; 610 xp = x + (*vj)*5; 611 while (nz--) { 612 /* x(:) += U(k,:)^T*(Dk*xk) */ 613 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 614 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 615 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 616 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 617 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 618 vj++; xp = x + (*vj)*5; 619 v += 25; 620 } 621 /* xk = inv(Dk)*(Dk*xk) */ 622 diag = aa+k*25; /* ptr to inv(Dk) */ 623 xp = x + k*5; 624 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 625 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 626 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 627 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 628 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 629 } 630 631 /* solve U*x = y by back substitution */ 632 for (k=mbs-1; k>=0; k--){ 633 v = aa + 25*ai[k]; 634 xp = x + k*5; 635 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 636 nz = ai[k+1] - ai[k]; 637 vj = aj + ai[k]; 638 xp = x + (*vj)*5; 639 while (nz--) { 640 /* xk += U(k,:)*x(:) */ 641 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 642 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 643 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 644 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 645 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 646 vj++; 647 v += 25; xp = x + (*vj)*5; 648 } 649 xp = x + k*5; 650 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 651 } 652 653 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 654 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 655 PetscLogFlops(25*(2*a->nz + mbs)); 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 661 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 662 { 663 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 664 IS isrow=a->row; 665 int mbs=a->mbs,*ai=a->i,*aj=a->j; 666 PetscErrorCode ierr; 667 int nz,*vj,k,*r,idx; 668 MatScalar *aa=a->a,*v,*diag; 669 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 670 671 PetscFunctionBegin; 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 t = a->solve_work; 675 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 676 677 /* solve U^T * D * y = b by forward substitution */ 678 tp = t; 679 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 680 idx = 4*r[k]; 681 tp[0] = b[idx]; 682 tp[1] = b[idx+1]; 683 tp[2] = b[idx+2]; 684 tp[3] = b[idx+3]; 685 tp += 4; 686 } 687 688 for (k=0; k<mbs; k++){ 689 v = aa + 16*ai[k]; 690 vj = aj + ai[k]; 691 tp = t + k*4; 692 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 693 nz = ai[k+1] - ai[k]; 694 695 tp = t + (*vj)*4; 696 while (nz--) { 697 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 698 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 699 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 700 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 701 vj++; tp = t + (*vj)*4; 702 v += 16; 703 } 704 705 /* xk = inv(Dk)*(Dk*xk) */ 706 diag = aa+k*16; /* ptr to inv(Dk) */ 707 tp = t + k*4; 708 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 709 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 710 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 711 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 712 } 713 714 /* solve U*x = y by back substitution */ 715 for (k=mbs-1; k>=0; k--){ 716 v = aa + 16*ai[k]; 717 vj = aj + ai[k]; 718 tp = t + k*4; 719 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 720 nz = ai[k+1] - ai[k]; 721 722 tp = t + (*vj)*4; 723 while (nz--) { 724 /* xk += U(k,:)*x(:) */ 725 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 726 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 727 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 728 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 729 vj++; tp = t + (*vj)*4; 730 v += 16; 731 } 732 tp = t + k*4; 733 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 734 idx = 4*r[k]; 735 x[idx] = x0; 736 x[idx+1] = x1; 737 x[idx+2] = x2; 738 x[idx+3] = x3; 739 } 740 741 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 PetscLogFlops(16*(2*a->nz + mbs)); 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 Special case where the matrix was factored in the natural ordering. 749 This eliminates the need for the column and row permutation. 750 */ 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 753 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 754 { 755 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 756 int mbs=a->mbs,*ai=a->i,*aj=a->j; 757 MatScalar *aa=a->a,*v,*diag; 758 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 759 PetscErrorCode ierr; 760 int nz,*vj,k; 761 762 PetscFunctionBegin; 763 764 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 765 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 766 767 /* solve U^T * D * y = b by forward substitution */ 768 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 769 for (k=0; k<mbs; k++){ 770 v = aa + 16*ai[k]; 771 xp = x + k*4; 772 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 773 nz = ai[k+1] - ai[k]; 774 vj = aj + ai[k]; 775 xp = x + (*vj)*4; 776 while (nz--) { 777 /* x(:) += U(k,:)^T*(Dk*xk) */ 778 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 779 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 780 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 781 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 782 vj++; xp = x + (*vj)*4; 783 v += 16; 784 } 785 /* xk = inv(Dk)*(Dk*xk) */ 786 diag = aa+k*16; /* ptr to inv(Dk) */ 787 xp = x + k*4; 788 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 789 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 790 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 791 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 792 } 793 794 /* solve U*x = y by back substitution */ 795 for (k=mbs-1; k>=0; k--){ 796 v = aa + 16*ai[k]; 797 xp = x + k*4; 798 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 799 nz = ai[k+1] - ai[k]; 800 vj = aj + ai[k]; 801 xp = x + (*vj)*4; 802 while (nz--) { 803 /* xk += U(k,:)*x(:) */ 804 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 805 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 806 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 807 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 808 vj++; 809 v += 16; xp = x + (*vj)*4; 810 } 811 xp = x + k*4; 812 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 813 } 814 815 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 816 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 817 PetscLogFlops(16*(2*a->nz + mbs)); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 823 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 824 { 825 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 826 IS isrow=a->row; 827 int mbs=a->mbs,*ai=a->i,*aj=a->j; 828 PetscErrorCode ierr; 829 int nz,*vj,k,*r,idx; 830 MatScalar *aa=a->a,*v,*diag; 831 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 832 833 PetscFunctionBegin; 834 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 835 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 836 t = a->solve_work; 837 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 838 839 /* solve U^T * D * y = b by forward substitution */ 840 tp = t; 841 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 842 idx = 3*r[k]; 843 tp[0] = b[idx]; 844 tp[1] = b[idx+1]; 845 tp[2] = b[idx+2]; 846 tp += 3; 847 } 848 849 for (k=0; k<mbs; k++){ 850 v = aa + 9*ai[k]; 851 vj = aj + ai[k]; 852 tp = t + k*3; 853 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 854 nz = ai[k+1] - ai[k]; 855 856 tp = t + (*vj)*3; 857 while (nz--) { 858 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 859 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 860 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 861 vj++; tp = t + (*vj)*3; 862 v += 9; 863 } 864 865 /* xk = inv(Dk)*(Dk*xk) */ 866 diag = aa+k*9; /* ptr to inv(Dk) */ 867 tp = t + k*3; 868 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 869 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 870 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 871 } 872 873 /* solve U*x = y by back substitution */ 874 for (k=mbs-1; k>=0; k--){ 875 v = aa + 9*ai[k]; 876 vj = aj + ai[k]; 877 tp = t + k*3; 878 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 879 nz = ai[k+1] - ai[k]; 880 881 tp = t + (*vj)*3; 882 while (nz--) { 883 /* xk += U(k,:)*x(:) */ 884 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 885 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 886 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 887 vj++; tp = t + (*vj)*3; 888 v += 9; 889 } 890 tp = t + k*3; 891 tp[0] = x0; tp[1] = x1; tp[2] = x2; 892 idx = 3*r[k]; 893 x[idx] = x0; 894 x[idx+1] = x1; 895 x[idx+2] = x2; 896 } 897 898 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 899 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 900 PetscLogFlops(9*(2*a->nz + mbs)); 901 PetscFunctionReturn(0); 902 } 903 904 /* 905 Special case where the matrix was factored in the natural ordering. 906 This eliminates the need for the column and row permutation. 907 */ 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 910 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 911 { 912 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 913 int mbs=a->mbs,*ai=a->i,*aj=a->j; 914 MatScalar *aa=a->a,*v,*diag; 915 PetscScalar *x,*xp,*b,x0,x1,x2; 916 PetscErrorCode ierr; 917 int nz,*vj,k; 918 919 PetscFunctionBegin; 920 921 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 922 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 923 924 /* solve U^T * D * y = b by forward substitution */ 925 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 926 for (k=0; k<mbs; k++){ 927 v = aa + 9*ai[k]; 928 xp = x + k*3; 929 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 930 nz = ai[k+1] - ai[k]; 931 vj = aj + ai[k]; 932 xp = x + (*vj)*3; 933 while (nz--) { 934 /* x(:) += U(k,:)^T*(Dk*xk) */ 935 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 936 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 937 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 938 vj++; xp = x + (*vj)*3; 939 v += 9; 940 } 941 /* xk = inv(Dk)*(Dk*xk) */ 942 diag = aa+k*9; /* ptr to inv(Dk) */ 943 xp = x + k*3; 944 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 945 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 946 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 947 } 948 949 /* solve U*x = y by back substitution */ 950 for (k=mbs-1; k>=0; k--){ 951 v = aa + 9*ai[k]; 952 xp = x + k*3; 953 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 954 nz = ai[k+1] - ai[k]; 955 vj = aj + ai[k]; 956 xp = x + (*vj)*3; 957 while (nz--) { 958 /* xk += U(k,:)*x(:) */ 959 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 960 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 961 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 962 vj++; 963 v += 9; xp = x + (*vj)*3; 964 } 965 xp = x + k*3; 966 xp[0] = x0; xp[1] = x1; xp[2] = x2; 967 } 968 969 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 PetscLogFlops(9*(2*a->nz + mbs)); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 977 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 978 { 979 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 980 IS isrow=a->row; 981 int mbs=a->mbs,*ai=a->i,*aj=a->j; 982 PetscErrorCode ierr; 983 int nz,*vj,k,k2,*r,idx; 984 MatScalar *aa=a->a,*v,*diag; 985 PetscScalar *x,*b,x0,x1,*t; 986 987 PetscFunctionBegin; 988 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 t = a->solve_work; 991 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 992 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 993 994 /* solve U^T * D * y = perm(b) by forward substitution */ 995 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 996 idx = 2*r[k]; 997 t[k*2] = b[idx]; 998 t[k*2+1] = b[idx+1]; 999 } 1000 for (k=0; k<mbs; k++){ 1001 v = aa + 4*ai[k]; 1002 vj = aj + ai[k]; 1003 k2 = k*2; 1004 x0 = t[k2]; x1 = t[k2+1]; 1005 nz = ai[k+1] - ai[k]; 1006 while (nz--) { 1007 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1008 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1009 vj++; v += 4; 1010 } 1011 diag = aa+k*4; /* ptr to inv(Dk) */ 1012 t[k2] = diag[0]*x0 + diag[2]*x1; 1013 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1014 } 1015 1016 /* solve U*x = y by back substitution */ 1017 for (k=mbs-1; k>=0; k--){ 1018 v = aa + 4*ai[k]; 1019 vj = aj + ai[k]; 1020 k2 = k*2; 1021 x0 = t[k2]; x1 = t[k2+1]; 1022 nz = ai[k+1] - ai[k]; 1023 while (nz--) { 1024 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1025 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1026 vj++; v += 4; 1027 } 1028 t[k2] = x0; 1029 t[k2+1] = x1; 1030 idx = 2*r[k]; 1031 x[idx] = x0; 1032 x[idx+1] = x1; 1033 } 1034 1035 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1036 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1037 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1038 PetscLogFlops(4*(2*a->nz + mbs)); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 /* 1043 Special case where the matrix was factored in the natural ordering. 1044 This eliminates the need for the column and row permutation. 1045 */ 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1048 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1049 { 1050 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1051 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1052 MatScalar *aa=a->a,*v,*diag; 1053 PetscScalar *x,*b,x0,x1; 1054 PetscErrorCode ierr; 1055 int nz,*vj,k,k2; 1056 1057 PetscFunctionBegin; 1058 1059 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1060 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1061 1062 /* solve U^T * D * y = b by forward substitution */ 1063 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1064 for (k=0; k<mbs; k++){ 1065 v = aa + 4*ai[k]; 1066 vj = aj + ai[k]; 1067 k2 = k*2; 1068 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1069 nz = ai[k+1] - ai[k]; 1070 1071 while (nz--) { 1072 /* x(:) += U(k,:)^T*(Dk*xk) */ 1073 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1074 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1075 vj++; v += 4; 1076 } 1077 /* xk = inv(Dk)*(Dk*xk) */ 1078 diag = aa+k*4; /* ptr to inv(Dk) */ 1079 x[k2] = diag[0]*x0 + diag[2]*x1; 1080 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1081 } 1082 1083 /* solve U*x = y by back substitution */ 1084 for (k=mbs-1; k>=0; k--){ 1085 v = aa + 4*ai[k]; 1086 vj = aj + ai[k]; 1087 k2 = k*2; 1088 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1089 nz = ai[k+1] - ai[k]; 1090 while (nz--) { 1091 /* xk += U(k,:)*x(:) */ 1092 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1093 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1094 vj++; v += 4; 1095 } 1096 x[k2] = x0; 1097 x[k2+1] = x1; 1098 } 1099 1100 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1101 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1102 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1108 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1109 { 1110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1111 IS isrow=a->row; 1112 PetscErrorCode ierr; 1113 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1114 MatScalar *aa=a->a,*v; 1115 PetscScalar *x,*b,xk,*t; 1116 int nz,*vj,k; 1117 1118 PetscFunctionBegin; 1119 if (!mbs) PetscFunctionReturn(0); 1120 1121 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1122 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1123 t = a->solve_work; 1124 1125 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1126 1127 /* solve U^T*D*y = perm(b) by forward substitution */ 1128 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1129 for (k=0; k<mbs; k++){ 1130 v = aa + ai[k]; 1131 vj = aj + ai[k]; 1132 xk = t[k]; 1133 nz = ai[k+1] - ai[k]; 1134 while (nz--) t[*vj++] += (*v++) * xk; 1135 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1136 } 1137 1138 /* solve U*x = y by back substitution */ 1139 for (k=mbs-1; k>=0; k--){ 1140 v = aa + ai[k]; 1141 vj = aj + ai[k]; 1142 xk = t[k]; 1143 nz = ai[k+1] - ai[k]; 1144 while (nz--) xk += (*v++) * t[*vj++]; 1145 t[k] = xk; 1146 x[rip[k]] = xk; 1147 } 1148 1149 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1150 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1151 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1152 PetscLogFlops(4*a->nz + A->m); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1158 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1159 { 1160 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 if (a->bs == 1) { 1165 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1166 } else { 1167 IS isrow=a->row; 1168 int mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1169 MatScalar *aa=a->a,*v; 1170 PetscScalar *x,*b,*t; 1171 int nz,*vj,k,n; 1172 if (bb->n > a->solves_work_n) { 1173 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1174 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1175 a->solves_work_n = bb->n; 1176 } 1177 n = bb->n; 1178 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1180 t = a->solves_work; 1181 1182 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1183 1184 /* solve U^T*D*y = perm(b) by forward substitution */ 1185 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1186 for (k=0; k<mbs; k++){ 1187 v = aa + ai[k]; 1188 vj = aj + ai[k]; 1189 nz = ai[k+1] - ai[k]; 1190 while (nz--) { 1191 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1192 v++;vj++; 1193 } 1194 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1195 } 1196 1197 /* solve U*x = y by back substitution */ 1198 for (k=mbs-1; k>=0; k--){ 1199 v = aa + ai[k]; 1200 vj = aj + ai[k]; 1201 nz = ai[k+1] - ai[k]; 1202 while (nz--) { 1203 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1204 v++;vj++; 1205 } 1206 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1207 } 1208 1209 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1210 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1212 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 Special case where the matrix was ILU(0) factored in the natural 1219 ordering. This eliminates the need for the column and row permutation. 1220 */ 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1223 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1224 { 1225 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1226 PetscErrorCode ierr; 1227 int mbs=a->mbs,*ai=a->i,*aj=a->j; 1228 MatScalar *aa=a->a,*v; 1229 PetscScalar *x,*b,xk; 1230 int nz,*vj,k; 1231 1232 PetscFunctionBegin; 1233 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1234 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1235 1236 /* solve U^T*D*y = b by forward substitution */ 1237 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1238 for (k=0; k<mbs; k++){ 1239 v = aa + ai[k] + 1; 1240 vj = aj + ai[k] + 1; 1241 xk = x[k]; 1242 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1243 while (nz--) x[*vj++] += (*v++) * xk; 1244 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1245 } 1246 1247 /* solve U*x = y by back substitution */ 1248 for (k=mbs-2; k>=0; k--){ 1249 v = aa + ai[k] + 1; 1250 vj = aj + ai[k] + 1; 1251 xk = x[k]; 1252 nz = ai[k+1] - ai[k] - 1; 1253 while (nz--) xk += (*v++) * x[*vj++]; 1254 x[k] = xk; 1255 } 1256 1257 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1259 PetscLogFlops(4*a->nz + A->m); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1266 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1267 { 1268 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1269 PetscErrorCode ierr; 1270 int *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1271 int *jutmp,bs = a->bs,bs2=a->bs2; 1272 int m,realloc = 0,*levtmp; 1273 int *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1274 int incrlev,*lev,shift,prow,nz; 1275 int *il,ili,nextprow; 1276 PetscReal f = info->fill,levels = info->levels; 1277 PetscTruth perm_identity; 1278 1279 PetscFunctionBegin; 1280 /* check whether perm is the identity mapping */ 1281 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1282 1283 /* special case that simply copies fill pattern */ 1284 if (!levels && perm_identity && bs==1) { 1285 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1286 (*B)->factor = FACTOR_CHOLESKY; 1287 b = (Mat_SeqSBAIJ*)(*B)->data; 1288 b->row = perm; 1289 b->icol = perm; 1290 b->factor_damping = info->damping; 1291 b->factor_shift = info->shift; 1292 b->factor_zeropivot = info->zeropivot; 1293 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1294 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1295 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1296 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1297 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1302 if (levels > 0 && perm_identity && bs==1 ){ 1303 if (!perm_identity) a->permute = PETSC_TRUE; 1304 1305 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1306 1307 if (perm_identity){ /* without permutation */ 1308 ai = a->i; aj = a->j; 1309 } else { /* non-trivial permutation */ 1310 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1311 ai = a->inew; aj = a->jnew; 1312 } 1313 1314 /* initialization */ 1315 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1316 umax = (int)(f*ai[mbs] + 1); 1317 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1318 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1319 iu[0] = 0; 1320 juidx = 0; /* index for ju */ 1321 ierr = PetscMalloc((4*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1322 q = jl + mbs; /* linked list for col index of active row */ 1323 levtmp = q + mbs; 1324 il = levtmp + mbs; 1325 for (i=0; i<mbs; i++){ 1326 jl[i] = mbs; 1327 q[i] = 0; 1328 il[i] = 0; 1329 } 1330 1331 /* for each row k */ 1332 for (k=0; k<mbs; k++){ 1333 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1334 q[k] = mbs; 1335 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1336 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1337 jmax = ai[rip[k]+1]; 1338 for (j=jmin; j<jmax; j++){ 1339 vj = rip[aj[j]]; /* col. value */ 1340 if(vj > k){ 1341 qm = k; 1342 do { 1343 m = qm; qm = q[m]; 1344 } while(qm < vj); 1345 if (qm == vj) { 1346 SETERRQ(1," error: duplicate entry in A\n"); 1347 } 1348 nzk++; 1349 q[m] = vj; 1350 q[vj] = qm; 1351 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1352 } /* if(vj > k) */ 1353 } /* for (j=jmin; j<jmax; j++) */ 1354 1355 /* modify nonzero structure of k-th row by computing fill-in 1356 for each row i to be merged in */ 1357 prow = k; 1358 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1359 1360 while (prow < k){ 1361 nextprow = jl[prow]; 1362 /* merge row prow into k-th row */ 1363 ili = il[prow]; 1364 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1365 jmax = iu[prow+1]; 1366 qm = k; 1367 for (j=jmin; j<jmax; j++){ 1368 vj = ju[j]; 1369 incrlev = lev[j] + 1; 1370 if (incrlev > levels) continue; 1371 do { 1372 m = qm; qm = q[m]; 1373 } while (qm < vj); 1374 if (qm != vj){ /* a fill */ 1375 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1376 levtmp[vj] = incrlev; 1377 } else { 1378 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1379 } 1380 } 1381 if (jmin < jmax){ /* update il and jl */ 1382 il[prow] = jmin; 1383 j = ju[jmin]; 1384 jl[prow] = jl[j]; jl[j] = prow; 1385 } 1386 prow = nextprow; 1387 } 1388 1389 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1390 if (nzk > 0){ 1391 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1392 jl[k] = jl[i]; jl[i] = k; 1393 il[k] = iu[k] + 1; 1394 } 1395 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1396 1397 /* allocate more space to ju if needed */ 1398 if (iu[k+1] > umax) { 1399 /* estimate how much additional space we will need */ 1400 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1401 /* just double the memory each time */ 1402 maxadd = umax; 1403 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1404 umax += maxadd; 1405 1406 /* allocate a longer ju */ 1407 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1408 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1409 ierr = PetscFree(ju);CHKERRQ(ierr); 1410 ju = jutmp; 1411 1412 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1413 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(int));CHKERRQ(ierr); 1414 ierr = PetscFree(lev);CHKERRQ(ierr); 1415 lev = jutmp; 1416 realloc += 2; /* count how many times we realloc */ 1417 } 1418 1419 /* save nonzero structure of k-th row in ju */ 1420 ju[juidx] = k; /* diag[k] */ 1421 lev[juidx] = 0; 1422 juidx++; 1423 i = k; 1424 while (nzk --) { 1425 i = q[i]; 1426 ju[juidx] = i; 1427 lev[juidx] = levtmp[i]; 1428 juidx++; 1429 } 1430 } /* end of for (k=0; k<mbs; k++) */ 1431 1432 if (ai[mbs] != 0) { 1433 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1434 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1435 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1436 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1437 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1438 } else { 1439 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1440 } 1441 1442 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1443 ierr = PetscFree(jl);CHKERRQ(ierr); 1444 ierr = PetscFree(lev);CHKERRQ(ierr); 1445 1446 /* put together the new matrix */ 1447 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1448 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* PetscLogObjectParent(*B,iperm); */ 1452 b = (Mat_SeqSBAIJ*)(*B)->data; 1453 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1454 b->singlemalloc = PETSC_FALSE; 1455 /* the next line frees the default space generated by the Create() */ 1456 ierr = PetscFree(b->a);CHKERRQ(ierr); 1457 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1458 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1459 b->j = ju; 1460 b->i = iu; 1461 b->diag = 0; 1462 b->ilen = 0; 1463 b->imax = 0; 1464 b->row = perm; 1465 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1466 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1467 b->icol = perm; 1468 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1469 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1470 /* In b structure: Free imax, ilen, old a, old j. 1471 Allocate idnew, solve_work, new a, new j */ 1472 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1473 b->maxnz = b->nz = iu[mbs]; 1474 b->factor_damping = info->damping; 1475 b->factor_shift = info->shift; 1476 b->factor_zeropivot = info->zeropivot; 1477 1478 (*B)->factor = FACTOR_CHOLESKY; 1479 (*B)->info.factor_mallocs = realloc; 1480 (*B)->info.fill_ratio_given = f; 1481 if (ai[mbs] != 0) { 1482 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1483 } else { 1484 (*B)->info.fill_ratio_needed = 0.0; 1485 } 1486 1487 1488 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1489 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1490 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1491 1492 PetscFunctionReturn(0); 1493 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1494 1495 if (!perm_identity) a->permute = PETSC_TRUE; 1496 if (perm_identity){ 1497 ai = a->i; aj = a->j; 1498 } else { /* non-trivial permutation */ 1499 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1500 ai = a->inew; aj = a->jnew; 1501 } 1502 1503 /* initialization */ 1504 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1505 umax = (int)(f*ai[mbs] + 1); 1506 ierr = PetscMalloc(umax*sizeof(int),&lev);CHKERRQ(ierr); 1507 umax += mbs + 1; 1508 shift = mbs + 1; 1509 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 1510 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 1511 iu[0] = mbs + 1; 1512 juidx = mbs + 1; 1513 /* prowl: linked list for pivot row */ 1514 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&prowl);CHKERRQ(ierr); 1515 /* q: linked list for col index */ 1516 q = prowl + mbs; 1517 levtmp = q + mbs; 1518 1519 for (i=0; i<mbs; i++){ 1520 prowl[i] = mbs; 1521 q[i] = 0; 1522 } 1523 1524 /* for each row k */ 1525 for (k=0; k<mbs; k++){ 1526 nzk = 0; 1527 q[k] = mbs; 1528 /* copy current row into linked list */ 1529 nz = ai[rip[k]+1] - ai[rip[k]]; 1530 j = ai[rip[k]]; 1531 while (nz--){ 1532 vj = rip[aj[j++]]; 1533 if (vj > k){ 1534 qm = k; 1535 do { 1536 m = qm; qm = q[m]; 1537 } while(qm < vj); 1538 if (qm == vj) { 1539 SETERRQ(1," error: duplicate entry in A\n"); 1540 } 1541 nzk++; 1542 q[m] = vj; 1543 q[vj] = qm; 1544 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1545 } 1546 } 1547 1548 /* modify nonzero structure of k-th row by computing fill-in 1549 for each row prow to be merged in */ 1550 prow = k; 1551 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1552 1553 while (prow < k){ 1554 /* merge row prow into k-th row */ 1555 jmin = iu[prow] + 1; 1556 jmax = iu[prow+1]; 1557 qm = k; 1558 for (j=jmin; j<jmax; j++){ 1559 incrlev = lev[j-shift] + 1; 1560 if (incrlev > levels) continue; 1561 1562 vj = ju[j]; 1563 do { 1564 m = qm; qm = q[m]; 1565 } while (qm < vj); 1566 if (qm != vj){ /* a fill */ 1567 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1568 levtmp[vj] = incrlev; 1569 } else { 1570 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1571 } 1572 } 1573 prow = prowl[prow]; /* next pivot row */ 1574 } 1575 1576 /* add k to row list for first nonzero element in k-th row */ 1577 if (nzk > 1){ 1578 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1579 prowl[k] = prowl[i]; prowl[i] = k; 1580 } 1581 iu[k+1] = iu[k] + nzk; 1582 1583 /* allocate more space to ju and lev if needed */ 1584 if (iu[k+1] > umax) { 1585 /* estimate how much additional space we will need */ 1586 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1587 /* just double the memory each time */ 1588 maxadd = umax; 1589 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1590 umax += maxadd; 1591 1592 /* allocate a longer ju */ 1593 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1594 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 1595 ierr = PetscFree(ju);CHKERRQ(ierr); 1596 ju = jutmp; 1597 1598 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 1599 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));CHKERRQ(ierr); 1600 ierr = PetscFree(lev);CHKERRQ(ierr); 1601 lev = jutmp; 1602 realloc += 2; /* count how many times we realloc */ 1603 } 1604 1605 /* save nonzero structure of k-th row in ju */ 1606 i=k; 1607 while (nzk --) { 1608 i = q[i]; 1609 ju[juidx] = i; 1610 lev[juidx-shift] = levtmp[i]; 1611 juidx++; 1612 } 1613 } 1614 1615 if (ai[mbs] != 0) { 1616 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1617 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1618 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1619 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1620 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1621 } else { 1622 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1623 } 1624 1625 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1626 ierr = PetscFree(prowl);CHKERRQ(ierr); 1627 ierr = PetscFree(lev);CHKERRQ(ierr); 1628 1629 /* put together the new matrix */ 1630 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1631 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1632 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1633 1634 /* PetscLogObjectParent(*B,iperm); */ 1635 b = (Mat_SeqSBAIJ*)(*B)->data; 1636 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1637 b->singlemalloc = PETSC_FALSE; 1638 /* the next line frees the default space generated by the Create() */ 1639 ierr = PetscFree(b->a);CHKERRQ(ierr); 1640 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1641 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1642 b->j = ju; 1643 b->i = iu; 1644 b->diag = 0; 1645 b->ilen = 0; 1646 b->imax = 0; 1647 1648 if (b->row) { 1649 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1650 } 1651 if (b->icol) { 1652 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1653 } 1654 b->row = perm; 1655 b->icol = perm; 1656 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1657 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1658 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1659 /* In b structure: Free imax, ilen, old a, old j. 1660 Allocate idnew, solve_work, new a, new j */ 1661 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 1662 b->maxnz = b->nz = iu[mbs]; 1663 1664 (*B)->factor = FACTOR_CHOLESKY; 1665 (*B)->info.factor_mallocs = realloc; 1666 (*B)->info.fill_ratio_given = f; 1667 if (ai[mbs] != 0) { 1668 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1669 } else { 1670 (*B)->info.fill_ratio_needed = 0.0; 1671 } 1672 1673 if (perm_identity){ 1674 switch (bs) { 1675 case 1: 1676 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1677 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1678 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1679 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1680 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1681 break; 1682 case 2: 1683 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1684 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1685 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1686 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1687 break; 1688 case 3: 1689 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1690 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1691 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1692 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1693 break; 1694 case 4: 1695 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1696 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1697 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1698 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1699 break; 1700 case 5: 1701 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1702 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1703 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1704 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1705 break; 1706 case 6: 1707 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1708 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1709 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1710 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1711 break; 1712 case 7: 1713 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1714 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1715 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1716 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1717 break; 1718 default: 1719 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1720 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1721 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1722 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1723 break; 1724 } 1725 } 1726 1727 PetscFunctionReturn(0); 1728 } 1729 1730 1731 1732