1 /* 2 Factorization code for SBAIJ format. 3 */ 4 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 #include "src/inline/dot.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 13 { 14 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 15 IS isrow=a->row; 16 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 17 PetscErrorCode ierr; 18 PetscInt nz,*vj,k,*r,idx,k1; 19 PetscInt bs=A->bs,bs2 = a->bs2; 20 MatScalar *aa=a->a,*v,*diag; 21 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 22 23 PetscFunctionBegin; 24 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 25 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 26 t = a->solve_work; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 29 30 /* solve U^T * D * y = b by forward substitution */ 31 xk = t; 32 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 33 idx = bs*r[k]; 34 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 35 } 36 for (k=0; k<mbs; k++){ 37 v = aa + bs2*ai[k]; 38 xk = t + k*bs; /* Dk*xk = k-th block of x */ 39 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 40 nz = ai[k+1] - ai[k]; 41 vj = aj + ai[k]; 42 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 43 while (nz--) { 44 /* x(:) += U(k,:)^T*(Dk*xk) */ 45 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 46 vj++; xj = t + (*vj)*bs; 47 v += bs2; 48 } 49 /* xk = inv(Dk)*(Dk*xk) */ 50 diag = aa+k*bs2; /* ptr to inv(Dk) */ 51 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 52 } 53 54 /* solve U*x = y by back substitution */ 55 for (k=mbs-1; k>=0; k--){ 56 v = aa + bs2*ai[k]; 57 xk = t + k*bs; /* xk */ 58 nz = ai[k+1] - ai[k]; 59 vj = aj + ai[k]; 60 xj = t + (*vj)*bs; 61 while (nz--) { 62 /* xk += U(k,:)*x(:) */ 63 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 64 vj++; 65 v += bs2; xj = t + (*vj)*bs; 66 } 67 idx = bs*r[k]; 68 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 69 } 70 71 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 72 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 73 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 74 PetscLogFlops(bs2*(2*a->nz + mbs)); 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 80 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 81 { 82 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 83 PetscErrorCode ierr; 84 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 85 PetscInt nz,*vj,k; 86 PetscInt bs=A->bs,bs2 = a->bs2; 87 MatScalar *aa=a->a,*v,*diag; 88 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 89 90 PetscFunctionBegin; 91 92 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 93 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 94 95 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 96 97 /* solve U^T * D * y = b by forward substitution */ 98 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 99 for (k=0; k<mbs; k++){ 100 v = aa + bs2*ai[k]; 101 xk = x + k*bs; /* Dk*xk = k-th block of x */ 102 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 103 nz = ai[k+1] - ai[k]; 104 vj = aj + ai[k]; 105 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 106 while (nz--) { 107 /* x(:) += U(k,:)^T*(Dk*xk) */ 108 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 109 vj++; xj = x + (*vj)*bs; 110 v += bs2; 111 } 112 /* xk = inv(Dk)*(Dk*xk) */ 113 diag = aa+k*bs2; /* ptr to inv(Dk) */ 114 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 115 } 116 117 /* solve U*x = y by back substitution */ 118 for (k=mbs-1; k>=0; k--){ 119 v = aa + bs2*ai[k]; 120 xk = x + k*bs; /* xk */ 121 nz = ai[k+1] - ai[k]; 122 vj = aj + ai[k]; 123 xj = x + (*vj)*bs; 124 while (nz--) { 125 /* xk += U(k,:)*x(:) */ 126 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 127 vj++; 128 v += bs2; xj = x + (*vj)*bs; 129 } 130 } 131 132 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 133 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 134 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 135 PetscLogFlops(bs2*(2*a->nz + mbs)); 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 141 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 142 { 143 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 144 IS isrow=a->row; 145 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 146 PetscErrorCode ierr; 147 PetscInt nz,*vj,k,*r,idx; 148 MatScalar *aa=a->a,*v,*d; 149 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 150 151 PetscFunctionBegin; 152 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 154 t = a->solve_work; 155 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 156 157 /* solve U^T * D * y = b by forward substitution */ 158 tp = t; 159 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 160 idx = 7*r[k]; 161 tp[0] = b[idx]; 162 tp[1] = b[idx+1]; 163 tp[2] = b[idx+2]; 164 tp[3] = b[idx+3]; 165 tp[4] = b[idx+4]; 166 tp[5] = b[idx+5]; 167 tp[6] = b[idx+6]; 168 tp += 7; 169 } 170 171 for (k=0; k<mbs; k++){ 172 v = aa + 49*ai[k]; 173 vj = aj + ai[k]; 174 tp = t + k*7; 175 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 176 nz = ai[k+1] - ai[k]; 177 tp = t + (*vj)*7; 178 while (nz--) { 179 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 180 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 181 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 182 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 183 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 184 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 185 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 186 vj++; tp = t + (*vj)*7; 187 v += 49; 188 } 189 190 /* xk = inv(Dk)*(Dk*xk) */ 191 d = aa+k*49; /* ptr to inv(Dk) */ 192 tp = t + k*7; 193 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 194 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 195 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 196 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 197 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 198 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 199 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 200 } 201 202 /* solve U*x = y by back substitution */ 203 for (k=mbs-1; k>=0; k--){ 204 v = aa + 49*ai[k]; 205 vj = aj + ai[k]; 206 tp = t + k*7; 207 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 208 nz = ai[k+1] - ai[k]; 209 210 tp = t + (*vj)*7; 211 while (nz--) { 212 /* xk += U(k,:)*x(:) */ 213 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 214 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 215 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 216 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 217 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 218 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 219 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 220 vj++; tp = t + (*vj)*7; 221 v += 49; 222 } 223 tp = t + k*7; 224 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 225 idx = 7*r[k]; 226 x[idx] = x0; 227 x[idx+1] = x1; 228 x[idx+2] = x2; 229 x[idx+3] = x3; 230 x[idx+4] = x4; 231 x[idx+5] = x5; 232 x[idx+6] = x6; 233 } 234 235 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 236 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 237 PetscLogFlops(49*(2*a->nz + mbs)); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 243 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 244 { 245 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 246 PetscErrorCode ierr; 247 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 248 MatScalar *aa=a->a,*v,*d; 249 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 250 PetscInt nz,*vj,k; 251 252 PetscFunctionBegin; 253 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 256 /* solve U^T * D * y = b by forward substitution */ 257 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 258 for (k=0; k<mbs; k++){ 259 v = aa + 49*ai[k]; 260 xp = x + k*7; 261 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 262 nz = ai[k+1] - ai[k]; 263 vj = aj + ai[k]; 264 xp = x + (*vj)*7; 265 while (nz--) { 266 /* x(:) += U(k,:)^T*(Dk*xk) */ 267 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 268 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 269 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 270 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 271 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 272 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 273 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 274 vj++; xp = x + (*vj)*7; 275 v += 49; 276 } 277 /* xk = inv(Dk)*(Dk*xk) */ 278 d = aa+k*49; /* ptr to inv(Dk) */ 279 xp = x + k*7; 280 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 281 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 282 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 283 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 284 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 285 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 286 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 287 } 288 289 /* solve U*x = y by back substitution */ 290 for (k=mbs-1; k>=0; k--){ 291 v = aa + 49*ai[k]; 292 xp = x + k*7; 293 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 294 nz = ai[k+1] - ai[k]; 295 vj = aj + ai[k]; 296 xp = x + (*vj)*7; 297 while (nz--) { 298 /* xk += U(k,:)*x(:) */ 299 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 300 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 301 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 302 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 303 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 304 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 305 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 306 vj++; 307 v += 49; xp = x + (*vj)*7; 308 } 309 xp = x + k*7; 310 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 311 } 312 313 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 PetscLogFlops(49*(2*a->nz + mbs)); 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 321 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 322 { 323 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 324 IS isrow=a->row; 325 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 326 PetscErrorCode ierr; 327 PetscInt nz,*vj,k,*r,idx; 328 MatScalar *aa=a->a,*v,*d; 329 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 330 331 PetscFunctionBegin; 332 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 333 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 334 t = a->solve_work; 335 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 336 337 /* solve U^T * D * y = b by forward substitution */ 338 tp = t; 339 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 340 idx = 6*r[k]; 341 tp[0] = b[idx]; 342 tp[1] = b[idx+1]; 343 tp[2] = b[idx+2]; 344 tp[3] = b[idx+3]; 345 tp[4] = b[idx+4]; 346 tp[5] = b[idx+5]; 347 tp += 6; 348 } 349 350 for (k=0; k<mbs; k++){ 351 v = aa + 36*ai[k]; 352 vj = aj + ai[k]; 353 tp = t + k*6; 354 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 355 nz = ai[k+1] - ai[k]; 356 tp = t + (*vj)*6; 357 while (nz--) { 358 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 359 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 360 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 361 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 362 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 363 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 364 vj++; tp = t + (*vj)*6; 365 v += 36; 366 } 367 368 /* xk = inv(Dk)*(Dk*xk) */ 369 d = aa+k*36; /* ptr to inv(Dk) */ 370 tp = t + k*6; 371 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 372 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 373 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 374 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 375 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 376 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 377 } 378 379 /* solve U*x = y by back substitution */ 380 for (k=mbs-1; k>=0; k--){ 381 v = aa + 36*ai[k]; 382 vj = aj + ai[k]; 383 tp = t + k*6; 384 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 385 nz = ai[k+1] - ai[k]; 386 387 tp = t + (*vj)*6; 388 while (nz--) { 389 /* xk += U(k,:)*x(:) */ 390 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 391 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 392 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 393 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 394 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 395 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 396 vj++; tp = t + (*vj)*6; 397 v += 36; 398 } 399 tp = t + k*6; 400 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 401 idx = 6*r[k]; 402 x[idx] = x0; 403 x[idx+1] = x1; 404 x[idx+2] = x2; 405 x[idx+3] = x3; 406 x[idx+4] = x4; 407 x[idx+5] = x5; 408 } 409 410 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 411 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 412 PetscLogFlops(36*(2*a->nz + mbs)); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 418 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 419 { 420 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 421 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 422 MatScalar *aa=a->a,*v,*d; 423 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 424 PetscErrorCode ierr; 425 PetscInt nz,*vj,k; 426 427 PetscFunctionBegin; 428 429 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 430 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 431 432 /* solve U^T * D * y = b by forward substitution */ 433 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 434 for (k=0; k<mbs; k++){ 435 v = aa + 36*ai[k]; 436 xp = x + k*6; 437 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 438 nz = ai[k+1] - ai[k]; 439 vj = aj + ai[k]; 440 xp = x + (*vj)*6; 441 while (nz--) { 442 /* x(:) += U(k,:)^T*(Dk*xk) */ 443 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 444 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 445 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 446 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 447 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 448 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 449 vj++; xp = x + (*vj)*6; 450 v += 36; 451 } 452 /* xk = inv(Dk)*(Dk*xk) */ 453 d = aa+k*36; /* ptr to inv(Dk) */ 454 xp = x + k*6; 455 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 456 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 457 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 458 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 459 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 460 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 461 } 462 463 /* solve U*x = y by back substitution */ 464 for (k=mbs-1; k>=0; k--){ 465 v = aa + 36*ai[k]; 466 xp = x + k*6; 467 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 468 nz = ai[k+1] - ai[k]; 469 vj = aj + ai[k]; 470 xp = x + (*vj)*6; 471 while (nz--) { 472 /* xk += U(k,:)*x(:) */ 473 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 474 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 475 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 476 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 477 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 478 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 479 vj++; 480 v += 36; xp = x + (*vj)*6; 481 } 482 xp = x + k*6; 483 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 484 } 485 486 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 487 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 488 PetscLogFlops(36*(2*a->nz + mbs)); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 494 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 495 { 496 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 497 IS isrow=a->row; 498 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 499 PetscErrorCode ierr; 500 PetscInt nz,*vj,k,*r,idx; 501 MatScalar *aa=a->a,*v,*diag; 502 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 503 504 PetscFunctionBegin; 505 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 506 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 507 t = a->solve_work; 508 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 509 510 /* solve U^T * D * y = b by forward substitution */ 511 tp = t; 512 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 513 idx = 5*r[k]; 514 tp[0] = b[idx]; 515 tp[1] = b[idx+1]; 516 tp[2] = b[idx+2]; 517 tp[3] = b[idx+3]; 518 tp[4] = b[idx+4]; 519 tp += 5; 520 } 521 522 for (k=0; k<mbs; k++){ 523 v = aa + 25*ai[k]; 524 vj = aj + ai[k]; 525 tp = t + k*5; 526 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 527 nz = ai[k+1] - ai[k]; 528 529 tp = t + (*vj)*5; 530 while (nz--) { 531 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 532 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 533 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 534 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 535 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 536 vj++; tp = t + (*vj)*5; 537 v += 25; 538 } 539 540 /* xk = inv(Dk)*(Dk*xk) */ 541 diag = aa+k*25; /* ptr to inv(Dk) */ 542 tp = t + k*5; 543 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 544 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 545 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 546 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 547 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 548 } 549 550 /* solve U*x = y by back substitution */ 551 for (k=mbs-1; k>=0; k--){ 552 v = aa + 25*ai[k]; 553 vj = aj + ai[k]; 554 tp = t + k*5; 555 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 556 nz = ai[k+1] - ai[k]; 557 558 tp = t + (*vj)*5; 559 while (nz--) { 560 /* xk += U(k,:)*x(:) */ 561 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 562 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 563 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 564 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 565 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 566 vj++; tp = t + (*vj)*5; 567 v += 25; 568 } 569 tp = t + k*5; 570 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 571 idx = 5*r[k]; 572 x[idx] = x0; 573 x[idx+1] = x1; 574 x[idx+2] = x2; 575 x[idx+3] = x3; 576 x[idx+4] = x4; 577 } 578 579 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 580 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 581 PetscLogFlops(25*(2*a->nz + mbs)); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 587 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 588 { 589 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 590 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 591 MatScalar *aa=a->a,*v,*diag; 592 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 593 PetscErrorCode ierr; 594 PetscInt nz,*vj,k; 595 596 PetscFunctionBegin; 597 598 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 599 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 600 601 /* solve U^T * D * y = b by forward substitution */ 602 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 603 for (k=0; k<mbs; k++){ 604 v = aa + 25*ai[k]; 605 xp = x + k*5; 606 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 607 nz = ai[k+1] - ai[k]; 608 vj = aj + ai[k]; 609 xp = x + (*vj)*5; 610 while (nz--) { 611 /* x(:) += U(k,:)^T*(Dk*xk) */ 612 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 613 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 614 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 615 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 616 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 617 vj++; xp = x + (*vj)*5; 618 v += 25; 619 } 620 /* xk = inv(Dk)*(Dk*xk) */ 621 diag = aa+k*25; /* ptr to inv(Dk) */ 622 xp = x + k*5; 623 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 624 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 625 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 626 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 627 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 628 } 629 630 /* solve U*x = y by back substitution */ 631 for (k=mbs-1; k>=0; k--){ 632 v = aa + 25*ai[k]; 633 xp = x + k*5; 634 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 635 nz = ai[k+1] - ai[k]; 636 vj = aj + ai[k]; 637 xp = x + (*vj)*5; 638 while (nz--) { 639 /* xk += U(k,:)*x(:) */ 640 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 641 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 642 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 643 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 644 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 645 vj++; 646 v += 25; xp = x + (*vj)*5; 647 } 648 xp = x + k*5; 649 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 650 } 651 652 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 653 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 654 PetscLogFlops(25*(2*a->nz + mbs)); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 660 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 661 { 662 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 663 IS isrow=a->row; 664 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 665 PetscErrorCode ierr; 666 PetscInt nz,*vj,k,*r,idx; 667 MatScalar *aa=a->a,*v,*diag; 668 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 669 670 PetscFunctionBegin; 671 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 672 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 673 t = a->solve_work; 674 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 675 676 /* solve U^T * D * y = b by forward substitution */ 677 tp = t; 678 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 679 idx = 4*r[k]; 680 tp[0] = b[idx]; 681 tp[1] = b[idx+1]; 682 tp[2] = b[idx+2]; 683 tp[3] = b[idx+3]; 684 tp += 4; 685 } 686 687 for (k=0; k<mbs; k++){ 688 v = aa + 16*ai[k]; 689 vj = aj + ai[k]; 690 tp = t + k*4; 691 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 692 nz = ai[k+1] - ai[k]; 693 694 tp = t + (*vj)*4; 695 while (nz--) { 696 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 697 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 698 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 699 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 700 vj++; tp = t + (*vj)*4; 701 v += 16; 702 } 703 704 /* xk = inv(Dk)*(Dk*xk) */ 705 diag = aa+k*16; /* ptr to inv(Dk) */ 706 tp = t + k*4; 707 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 708 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 709 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 710 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 711 } 712 713 /* solve U*x = y by back substitution */ 714 for (k=mbs-1; k>=0; k--){ 715 v = aa + 16*ai[k]; 716 vj = aj + ai[k]; 717 tp = t + k*4; 718 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 719 nz = ai[k+1] - ai[k]; 720 721 tp = t + (*vj)*4; 722 while (nz--) { 723 /* xk += U(k,:)*x(:) */ 724 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 725 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 726 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 727 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 728 vj++; tp = t + (*vj)*4; 729 v += 16; 730 } 731 tp = t + k*4; 732 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 733 idx = 4*r[k]; 734 x[idx] = x0; 735 x[idx+1] = x1; 736 x[idx+2] = x2; 737 x[idx+3] = x3; 738 } 739 740 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 741 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 742 PetscLogFlops(16*(2*a->nz + mbs)); 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 Special case where the matrix was factored in the natural ordering. 748 This eliminates the need for the column and row permutation. 749 */ 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 752 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 753 { 754 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 755 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 756 MatScalar *aa=a->a,*v,*diag; 757 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 758 PetscErrorCode ierr; 759 PetscInt nz,*vj,k; 760 761 PetscFunctionBegin; 762 763 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 765 766 /* solve U^T * D * y = b by forward substitution */ 767 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 768 for (k=0; k<mbs; k++){ 769 v = aa + 16*ai[k]; 770 xp = x + k*4; 771 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 772 nz = ai[k+1] - ai[k]; 773 vj = aj + ai[k]; 774 xp = x + (*vj)*4; 775 while (nz--) { 776 /* x(:) += U(k,:)^T*(Dk*xk) */ 777 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 778 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 779 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 780 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 781 vj++; xp = x + (*vj)*4; 782 v += 16; 783 } 784 /* xk = inv(Dk)*(Dk*xk) */ 785 diag = aa+k*16; /* ptr to inv(Dk) */ 786 xp = x + k*4; 787 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 788 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 789 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 790 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 791 } 792 793 /* solve U*x = y by back substitution */ 794 for (k=mbs-1; k>=0; k--){ 795 v = aa + 16*ai[k]; 796 xp = x + k*4; 797 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 798 nz = ai[k+1] - ai[k]; 799 vj = aj + ai[k]; 800 xp = x + (*vj)*4; 801 while (nz--) { 802 /* xk += U(k,:)*x(:) */ 803 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 804 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 805 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 806 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 807 vj++; 808 v += 16; xp = x + (*vj)*4; 809 } 810 xp = x + k*4; 811 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 812 } 813 814 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 815 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 816 PetscLogFlops(16*(2*a->nz + mbs)); 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 822 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 823 { 824 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 825 IS isrow=a->row; 826 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 827 PetscErrorCode ierr; 828 PetscInt nz,*vj,k,*r,idx; 829 MatScalar *aa=a->a,*v,*diag; 830 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 831 832 PetscFunctionBegin; 833 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 834 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 835 t = a->solve_work; 836 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 837 838 /* solve U^T * D * y = b by forward substitution */ 839 tp = t; 840 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 841 idx = 3*r[k]; 842 tp[0] = b[idx]; 843 tp[1] = b[idx+1]; 844 tp[2] = b[idx+2]; 845 tp += 3; 846 } 847 848 for (k=0; k<mbs; k++){ 849 v = aa + 9*ai[k]; 850 vj = aj + ai[k]; 851 tp = t + k*3; 852 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 853 nz = ai[k+1] - ai[k]; 854 855 tp = t + (*vj)*3; 856 while (nz--) { 857 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 858 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 859 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 860 vj++; tp = t + (*vj)*3; 861 v += 9; 862 } 863 864 /* xk = inv(Dk)*(Dk*xk) */ 865 diag = aa+k*9; /* ptr to inv(Dk) */ 866 tp = t + k*3; 867 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 868 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 869 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 870 } 871 872 /* solve U*x = y by back substitution */ 873 for (k=mbs-1; k>=0; k--){ 874 v = aa + 9*ai[k]; 875 vj = aj + ai[k]; 876 tp = t + k*3; 877 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 878 nz = ai[k+1] - ai[k]; 879 880 tp = t + (*vj)*3; 881 while (nz--) { 882 /* xk += U(k,:)*x(:) */ 883 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 884 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 885 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 886 vj++; tp = t + (*vj)*3; 887 v += 9; 888 } 889 tp = t + k*3; 890 tp[0] = x0; tp[1] = x1; tp[2] = x2; 891 idx = 3*r[k]; 892 x[idx] = x0; 893 x[idx+1] = x1; 894 x[idx+2] = x2; 895 } 896 897 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 PetscLogFlops(9*(2*a->nz + mbs)); 900 PetscFunctionReturn(0); 901 } 902 903 /* 904 Special case where the matrix was factored in the natural ordering. 905 This eliminates the need for the column and row permutation. 906 */ 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 909 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 910 { 911 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 912 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 913 MatScalar *aa=a->a,*v,*diag; 914 PetscScalar *x,*xp,*b,x0,x1,x2; 915 PetscErrorCode ierr; 916 PetscInt nz,*vj,k; 917 918 PetscFunctionBegin; 919 920 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 923 /* solve U^T * D * y = b by forward substitution */ 924 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 925 for (k=0; k<mbs; k++){ 926 v = aa + 9*ai[k]; 927 xp = x + k*3; 928 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 929 nz = ai[k+1] - ai[k]; 930 vj = aj + ai[k]; 931 xp = x + (*vj)*3; 932 while (nz--) { 933 /* x(:) += U(k,:)^T*(Dk*xk) */ 934 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 935 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 936 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 937 vj++; xp = x + (*vj)*3; 938 v += 9; 939 } 940 /* xk = inv(Dk)*(Dk*xk) */ 941 diag = aa+k*9; /* ptr to inv(Dk) */ 942 xp = x + k*3; 943 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 944 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 945 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 946 } 947 948 /* solve U*x = y by back substitution */ 949 for (k=mbs-1; k>=0; k--){ 950 v = aa + 9*ai[k]; 951 xp = x + k*3; 952 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 953 nz = ai[k+1] - ai[k]; 954 vj = aj + ai[k]; 955 xp = x + (*vj)*3; 956 while (nz--) { 957 /* xk += U(k,:)*x(:) */ 958 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 959 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 960 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 961 vj++; 962 v += 9; xp = x + (*vj)*3; 963 } 964 xp = x + k*3; 965 xp[0] = x0; xp[1] = x1; xp[2] = x2; 966 } 967 968 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 969 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 970 PetscLogFlops(9*(2*a->nz + mbs)); 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 976 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 977 { 978 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 979 IS isrow=a->row; 980 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 981 PetscErrorCode ierr; 982 PetscInt nz,*vj,k,k2,*r,idx; 983 MatScalar *aa=a->a,*v,*diag; 984 PetscScalar *x,*b,x0,x1,*t; 985 986 PetscFunctionBegin; 987 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 989 t = a->solve_work; 990 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 991 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 992 993 /* solve U^T * D * y = perm(b) by forward substitution */ 994 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 995 idx = 2*r[k]; 996 t[k*2] = b[idx]; 997 t[k*2+1] = b[idx+1]; 998 } 999 for (k=0; k<mbs; k++){ 1000 v = aa + 4*ai[k]; 1001 vj = aj + ai[k]; 1002 k2 = k*2; 1003 x0 = t[k2]; x1 = t[k2+1]; 1004 nz = ai[k+1] - ai[k]; 1005 while (nz--) { 1006 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1007 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1008 vj++; v += 4; 1009 } 1010 diag = aa+k*4; /* ptr to inv(Dk) */ 1011 t[k2] = diag[0]*x0 + diag[2]*x1; 1012 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1013 } 1014 1015 /* solve U*x = y by back substitution */ 1016 for (k=mbs-1; k>=0; k--){ 1017 v = aa + 4*ai[k]; 1018 vj = aj + ai[k]; 1019 k2 = k*2; 1020 x0 = t[k2]; x1 = t[k2+1]; 1021 nz = ai[k+1] - ai[k]; 1022 while (nz--) { 1023 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1024 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1025 vj++; v += 4; 1026 } 1027 t[k2] = x0; 1028 t[k2+1] = x1; 1029 idx = 2*r[k]; 1030 x[idx] = x0; 1031 x[idx+1] = x1; 1032 } 1033 1034 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1036 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1037 PetscLogFlops(4*(2*a->nz + mbs)); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 /* 1042 Special case where the matrix was factored in the natural ordering. 1043 This eliminates the need for the column and row permutation. 1044 */ 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1047 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1048 { 1049 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1050 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1051 MatScalar *aa=a->a,*v,*diag; 1052 PetscScalar *x,*b,x0,x1; 1053 PetscErrorCode ierr; 1054 PetscInt nz,*vj,k,k2; 1055 1056 PetscFunctionBegin; 1057 1058 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1059 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1060 1061 /* solve U^T * D * y = b by forward substitution */ 1062 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 for (k=0; k<mbs; k++){ 1064 v = aa + 4*ai[k]; 1065 vj = aj + ai[k]; 1066 k2 = k*2; 1067 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1068 nz = ai[k+1] - ai[k]; 1069 1070 while (nz--) { 1071 /* x(:) += U(k,:)^T*(Dk*xk) */ 1072 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1073 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1074 vj++; v += 4; 1075 } 1076 /* xk = inv(Dk)*(Dk*xk) */ 1077 diag = aa+k*4; /* ptr to inv(Dk) */ 1078 x[k2] = diag[0]*x0 + diag[2]*x1; 1079 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1080 } 1081 1082 /* solve U*x = y by back substitution */ 1083 for (k=mbs-1; k>=0; k--){ 1084 v = aa + 4*ai[k]; 1085 vj = aj + ai[k]; 1086 k2 = k*2; 1087 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1088 nz = ai[k+1] - ai[k]; 1089 while (nz--) { 1090 /* xk += U(k,:)*x(:) */ 1091 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1092 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1093 vj++; v += 4; 1094 } 1095 x[k2] = x0; 1096 x[k2+1] = x1; 1097 } 1098 1099 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1100 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1101 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1107 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1108 { 1109 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1110 IS isrow=a->row; 1111 PetscErrorCode ierr; 1112 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1113 MatScalar *aa=a->a,*v; 1114 PetscScalar *x,*b,xk,*t; 1115 PetscInt nz,*vj,k; 1116 1117 PetscFunctionBegin; 1118 if (!mbs) PetscFunctionReturn(0); 1119 1120 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1121 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1122 t = a->solve_work; 1123 1124 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1125 1126 /* solve U^T*D*y = perm(b) by forward substitution */ 1127 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1128 for (k=0; k<mbs; k++){ 1129 v = aa + ai[k]; 1130 vj = aj + ai[k]; 1131 xk = t[k]; 1132 nz = ai[k+1] - ai[k]; 1133 while (nz--) t[*vj++] += (*v++) * xk; 1134 t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */ 1135 } 1136 1137 /* solve U*x = y by back substitution */ 1138 for (k=mbs-1; k>=0; k--){ 1139 v = aa + ai[k]; 1140 vj = aj + ai[k]; 1141 xk = t[k]; 1142 nz = ai[k+1] - ai[k]; 1143 while (nz--) xk += (*v++) * t[*vj++]; 1144 t[k] = xk; 1145 x[rip[k]] = xk; 1146 } 1147 1148 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1150 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1151 PetscLogFlops(4*a->nz + A->m); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1157 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1158 { 1159 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 if (A->bs == 1) { 1164 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1165 } else { 1166 IS isrow=a->row; 1167 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1168 MatScalar *aa=a->a,*v; 1169 PetscScalar *x,*b,*t; 1170 PetscInt nz,*vj,k,n; 1171 if (bb->n > a->solves_work_n) { 1172 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1173 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1174 a->solves_work_n = bb->n; 1175 } 1176 n = bb->n; 1177 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1178 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1179 t = a->solves_work; 1180 1181 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1182 1183 /* solve U^T*D*y = perm(b) by forward substitution */ 1184 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1185 for (k=0; k<mbs; k++){ 1186 v = aa + ai[k]; 1187 vj = aj + ai[k]; 1188 nz = ai[k+1] - ai[k]; 1189 while (nz--) { 1190 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1191 v++;vj++; 1192 } 1193 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1194 } 1195 1196 /* solve U*x = y by back substitution */ 1197 for (k=mbs-1; k>=0; k--){ 1198 v = aa + ai[k]; 1199 vj = aj + ai[k]; 1200 nz = ai[k+1] - ai[k]; 1201 while (nz--) { 1202 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1203 v++;vj++; 1204 } 1205 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1206 } 1207 1208 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1209 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1210 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1211 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 Special case where the matrix was ILU(0) factored in the natural 1218 ordering. This eliminates the need for the column and row permutation. 1219 */ 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1222 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1223 { 1224 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1225 PetscErrorCode ierr; 1226 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1227 MatScalar *aa=a->a,*v; 1228 PetscScalar *x,*b,xk; 1229 PetscInt nz,*vj,k; 1230 1231 PetscFunctionBegin; 1232 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1233 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1234 1235 /* solve U^T*D*y = b by forward substitution */ 1236 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1237 for (k=0; k<mbs; k++){ 1238 v = aa + ai[k] + 1; 1239 vj = aj + ai[k] + 1; 1240 xk = x[k]; 1241 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1242 while (nz--) x[*vj++] += (*v++) * xk; 1243 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1244 } 1245 1246 /* solve U*x = y by back substitution */ 1247 for (k=mbs-2; k>=0; k--){ 1248 v = aa + ai[k] + 1; 1249 vj = aj + ai[k] + 1; 1250 xk = x[k]; 1251 nz = ai[k+1] - ai[k] - 1; 1252 while (nz--) xk += (*v++) * x[*vj++]; 1253 x[k] = xk; 1254 } 1255 1256 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1257 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1258 PetscLogFlops(4*a->nz + A->m); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1265 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1266 { 1267 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1268 PetscErrorCode ierr; 1269 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1270 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 1271 PetscInt m,reallocs = 0,*levtmp; 1272 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd,*jl; 1273 PetscInt incrlev,*lev,shift,prow,nz; 1274 PetscInt *il,ili,nextprow; 1275 PetscReal f = info->fill,levels = info->levels; 1276 PetscTruth perm_identity; 1277 1278 PetscFunctionBegin; 1279 /* check whether perm is the identity mapping */ 1280 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1281 1282 /* special case that simply copies fill pattern */ 1283 if (!levels && perm_identity && bs==1) { 1284 ierr = MatDuplicate_SeqSBAIJ(A,MAT_DO_NOT_COPY_VALUES,B);CHKERRQ(ierr); 1285 (*B)->factor = FACTOR_CHOLESKY; 1286 b = (Mat_SeqSBAIJ*)(*B)->data; 1287 b->row = perm; 1288 b->icol = perm; 1289 b->factor_damping = info->damping; 1290 b->factor_shift = info->shift; 1291 b->factor_zeropivot = info->zeropivot; 1292 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1293 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1294 ierr = PetscMalloc(((*B)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1295 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1296 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 /* --- inplace icc(levels), levels>0, ie, *B has same data structure as A --- */ 1301 if (levels > 0 && perm_identity && bs==1 ){ 1302 if (!perm_identity) a->permute = PETSC_TRUE; 1303 1304 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1305 1306 if (perm_identity){ /* without permutation */ 1307 ai = a->i; aj = a->j; 1308 } else { /* non-trivial permutation */ 1309 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 1310 ai = a->inew; aj = a->jnew; 1311 } 1312 1313 /* initialization */ 1314 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1315 umax = (PetscInt)(f*ai[mbs] + 1); 1316 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1317 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1318 iu[0] = 0; 1319 juidx = 0; /* index for ju */ 1320 ierr = PetscMalloc((4*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 1321 q = jl + mbs; /* linked list for col index of active row */ 1322 levtmp = q + mbs; 1323 il = levtmp + mbs; 1324 for (i=0; i<mbs; i++){ 1325 jl[i] = mbs; 1326 q[i] = 0; 1327 il[i] = 0; 1328 } 1329 1330 /* for each row k */ 1331 for (k=0; k<mbs; k++){ 1332 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 1333 q[k] = mbs; 1334 /* initialize nonzero structure of k-th row to row rip[k] of A */ 1335 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 1336 jmax = ai[rip[k]+1]; 1337 for (j=jmin; j<jmax; j++){ 1338 vj = rip[aj[j]]; /* col. value */ 1339 if(vj > k){ 1340 qm = k; 1341 do { 1342 m = qm; qm = q[m]; 1343 } while(qm < vj); 1344 if (qm == vj) { 1345 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1346 } 1347 nzk++; 1348 q[m] = vj; 1349 q[vj] = qm; 1350 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1351 } /* if(vj > k) */ 1352 } /* for (j=jmin; j<jmax; j++) */ 1353 1354 /* modify nonzero structure of k-th row by computing fill-in 1355 for each row i to be merged in */ 1356 prow = k; 1357 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 1358 1359 while (prow < k){ 1360 nextprow = jl[prow]; 1361 /* merge row prow into k-th row */ 1362 ili = il[prow]; 1363 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 1364 jmax = iu[prow+1]; 1365 qm = k; 1366 for (j=jmin; j<jmax; j++){ 1367 vj = ju[j]; 1368 incrlev = lev[j] + 1; 1369 if (incrlev > levels) continue; 1370 do { 1371 m = qm; qm = q[m]; 1372 } while (qm < vj); 1373 if (qm != vj){ /* a fill */ 1374 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1375 levtmp[vj] = incrlev; 1376 } else { 1377 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1378 } 1379 } 1380 if (jmin < jmax){ /* update il and jl */ 1381 il[prow] = jmin; 1382 j = ju[jmin]; 1383 jl[prow] = jl[j]; jl[j] = prow; 1384 } 1385 prow = nextprow; 1386 } 1387 1388 /* add the first nonzero element in U(k, k+1:mbs-1) to jl */ 1389 if (nzk > 0){ 1390 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1391 jl[k] = jl[i]; jl[i] = k; 1392 il[k] = iu[k] + 1; 1393 } 1394 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 1395 1396 /* allocate more space to ju if needed */ 1397 if (iu[k+1] > umax) { 1398 /* estimate how much additional space we will need */ 1399 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1400 /* just double the memory each time */ 1401 maxadd = umax; 1402 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1403 umax += maxadd; 1404 1405 /* allocate a longer ju */ 1406 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1407 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1408 ierr = PetscFree(ju);CHKERRQ(ierr); 1409 ju = jutmp; 1410 1411 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1412 ierr = PetscMemcpy(jutmp,lev,(iu[k])*sizeof(PetscInt));CHKERRQ(ierr); 1413 ierr = PetscFree(lev);CHKERRQ(ierr); 1414 lev = jutmp; 1415 reallocs += 2; /* count how many times we realloc */ 1416 } 1417 1418 /* save nonzero structure of k-th row in ju */ 1419 ju[juidx] = k; /* diag[k] */ 1420 lev[juidx] = 0; 1421 juidx++; 1422 i = k; 1423 while (nzk --) { 1424 i = q[i]; 1425 ju[juidx] = i; 1426 lev[juidx] = levtmp[i]; 1427 juidx++; 1428 } 1429 } /* end of for (k=0; k<mbs; k++) */ 1430 1431 if (ai[mbs] != 0) { 1432 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1433 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1434 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1435 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 1436 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1437 } else { 1438 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1439 } 1440 1441 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1442 ierr = PetscFree(jl);CHKERRQ(ierr); 1443 ierr = PetscFree(lev);CHKERRQ(ierr); 1444 1445 /* put together the new matrix */ 1446 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1447 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1448 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1449 1450 /* PetscLogObjectParent(*B,iperm); */ 1451 b = (Mat_SeqSBAIJ*)(*B)->data; 1452 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1453 b->singlemalloc = PETSC_FALSE; 1454 /* the next line frees the default space generated by the Create() */ 1455 ierr = PetscFree(b->a);CHKERRQ(ierr); 1456 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1457 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1458 b->j = ju; 1459 b->i = iu; 1460 b->diag = 0; 1461 b->ilen = 0; 1462 b->imax = 0; 1463 b->row = perm; 1464 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1465 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1466 b->icol = perm; 1467 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1468 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1469 /* In b structure: Free imax, ilen, old a, old j. 1470 Allocate idnew, solve_work, new a, new j */ 1471 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1472 b->maxnz = b->nz = iu[mbs]; 1473 b->factor_damping = info->damping; 1474 b->factor_shift = info->shift; 1475 b->factor_zeropivot = info->zeropivot; 1476 1477 (*B)->factor = FACTOR_CHOLESKY; 1478 (*B)->info.factor_mallocs = reallocs; 1479 (*B)->info.fill_ratio_given = f; 1480 if (ai[mbs] != 0) { 1481 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1482 } else { 1483 (*B)->info.fill_ratio_needed = 0.0; 1484 } 1485 1486 1487 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1488 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1489 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1490 1491 PetscFunctionReturn(0); 1492 } /* end of if (levels > 0 && perm_identity && bs==1 ) */ 1493 1494 if (!perm_identity) a->permute = PETSC_TRUE; 1495 if (perm_identity){ 1496 ai = a->i; aj = a->j; 1497 } else { /* non-trivial permutation */ 1498 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1499 ai = a->inew; aj = a->jnew; 1500 } 1501 1502 /* initialization */ 1503 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1504 umax = (PetscInt)(f*ai[mbs] + 1); 1505 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1506 umax += mbs + 1; 1507 shift = mbs + 1; 1508 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1509 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1510 iu[0] = mbs + 1; 1511 juidx = mbs + 1; 1512 /* prowl: linked list for pivot row */ 1513 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1514 /* q: linked list for col index */ 1515 q = prowl + mbs; 1516 levtmp = q + mbs; 1517 1518 for (i=0; i<mbs; i++){ 1519 prowl[i] = mbs; 1520 q[i] = 0; 1521 } 1522 1523 /* for each row k */ 1524 for (k=0; k<mbs; k++){ 1525 nzk = 0; 1526 q[k] = mbs; 1527 /* copy current row into linked list */ 1528 nz = ai[rip[k]+1] - ai[rip[k]]; 1529 j = ai[rip[k]]; 1530 while (nz--){ 1531 vj = rip[aj[j++]]; 1532 if (vj > k){ 1533 qm = k; 1534 do { 1535 m = qm; qm = q[m]; 1536 } while(qm < vj); 1537 if (qm == vj) { 1538 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1539 } 1540 nzk++; 1541 q[m] = vj; 1542 q[vj] = qm; 1543 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1544 } 1545 } 1546 1547 /* modify nonzero structure of k-th row by computing fill-in 1548 for each row prow to be merged in */ 1549 prow = k; 1550 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1551 1552 while (prow < k){ 1553 /* merge row prow into k-th row */ 1554 jmin = iu[prow] + 1; 1555 jmax = iu[prow+1]; 1556 qm = k; 1557 for (j=jmin; j<jmax; j++){ 1558 incrlev = lev[j-shift] + 1; 1559 if (incrlev > levels) continue; 1560 1561 vj = ju[j]; 1562 do { 1563 m = qm; qm = q[m]; 1564 } while (qm < vj); 1565 if (qm != vj){ /* a fill */ 1566 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1567 levtmp[vj] = incrlev; 1568 } else { 1569 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1570 } 1571 } 1572 prow = prowl[prow]; /* next pivot row */ 1573 } 1574 1575 /* add k to row list for first nonzero element in k-th row */ 1576 if (nzk > 1){ 1577 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1578 prowl[k] = prowl[i]; prowl[i] = k; 1579 } 1580 iu[k+1] = iu[k] + nzk; 1581 1582 /* allocate more space to ju and lev if needed */ 1583 if (iu[k+1] > umax) { 1584 /* estimate how much additional space we will need */ 1585 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1586 /* just double the memory each time */ 1587 maxadd = umax; 1588 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1589 umax += maxadd; 1590 1591 /* allocate a longer ju */ 1592 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1593 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1594 ierr = PetscFree(ju);CHKERRQ(ierr); 1595 ju = jutmp; 1596 1597 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1598 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1599 ierr = PetscFree(lev);CHKERRQ(ierr); 1600 lev = jutmp; 1601 reallocs += 2; /* count how many times we realloc */ 1602 } 1603 1604 /* save nonzero structure of k-th row in ju */ 1605 i=k; 1606 while (nzk --) { 1607 i = q[i]; 1608 ju[juidx] = i; 1609 lev[juidx-shift] = levtmp[i]; 1610 juidx++; 1611 } 1612 } 1613 1614 if (ai[mbs] != 0) { 1615 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1616 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1617 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1618 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1619 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1620 } else { 1621 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1622 } 1623 1624 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1625 ierr = PetscFree(prowl);CHKERRQ(ierr); 1626 ierr = PetscFree(lev);CHKERRQ(ierr); 1627 1628 /* put together the new matrix */ 1629 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1630 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1631 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1632 1633 /* PetscLogObjectParent(*B,iperm); */ 1634 b = (Mat_SeqSBAIJ*)(*B)->data; 1635 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1636 b->singlemalloc = PETSC_FALSE; 1637 /* the next line frees the default space generated by the Create() */ 1638 ierr = PetscFree(b->a);CHKERRQ(ierr); 1639 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1640 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1641 b->j = ju; 1642 b->i = iu; 1643 b->diag = 0; 1644 b->ilen = 0; 1645 b->imax = 0; 1646 1647 if (b->row) { 1648 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1649 } 1650 if (b->icol) { 1651 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1652 } 1653 b->row = perm; 1654 b->icol = perm; 1655 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1656 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1657 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1658 /* In b structure: Free imax, ilen, old a, old j. 1659 Allocate idnew, solve_work, new a, new j */ 1660 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1661 b->maxnz = b->nz = iu[mbs]; 1662 1663 (*B)->factor = FACTOR_CHOLESKY; 1664 (*B)->info.factor_mallocs = reallocs; 1665 (*B)->info.fill_ratio_given = f; 1666 if (ai[mbs] != 0) { 1667 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1668 } else { 1669 (*B)->info.fill_ratio_needed = 0.0; 1670 } 1671 1672 if (perm_identity){ 1673 switch (bs) { 1674 case 1: 1675 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1676 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1677 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1678 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1679 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1680 break; 1681 case 2: 1682 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1683 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1684 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1685 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1686 break; 1687 case 3: 1688 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1689 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1690 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1691 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1692 break; 1693 case 4: 1694 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1695 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1696 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1697 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1698 break; 1699 case 5: 1700 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1701 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1702 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1703 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1704 break; 1705 case 6: 1706 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1707 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1708 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1709 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1710 break; 1711 case 7: 1712 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1713 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1714 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1715 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1716 break; 1717 default: 1718 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1719 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1720 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1721 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1722 break; 1723 } 1724 } 1725 1726 PetscFunctionReturn(0); 1727 } 1728 1729 1730 1731