1 /* 2 Factorization code for SBAIJ format. 3 */ 4 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 #include "src/inline/dot.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 12 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 13 { 14 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 15 IS isrow=a->row; 16 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 17 PetscErrorCode ierr; 18 PetscInt nz,*vj,k,*r,idx,k1; 19 PetscInt bs=A->bs,bs2 = a->bs2; 20 MatScalar *aa=a->a,*v,*diag; 21 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 22 23 PetscFunctionBegin; 24 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 25 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 26 t = a->solve_work; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 29 30 /* solve U^T * D * y = b by forward substitution */ 31 xk = t; 32 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 33 idx = bs*r[k]; 34 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 35 } 36 for (k=0; k<mbs; k++){ 37 v = aa + bs2*ai[k]; 38 xk = t + k*bs; /* Dk*xk = k-th block of x */ 39 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 40 nz = ai[k+1] - ai[k]; 41 vj = aj + ai[k]; 42 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 43 while (nz--) { 44 /* x(:) += U(k,:)^T*(Dk*xk) */ 45 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 46 vj++; xj = t + (*vj)*bs; 47 v += bs2; 48 } 49 /* xk = inv(Dk)*(Dk*xk) */ 50 diag = aa+k*bs2; /* ptr to inv(Dk) */ 51 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 52 } 53 54 /* solve U*x = y by back substitution */ 55 for (k=mbs-1; k>=0; k--){ 56 v = aa + bs2*ai[k]; 57 xk = t + k*bs; /* xk */ 58 nz = ai[k+1] - ai[k]; 59 vj = aj + ai[k]; 60 xj = t + (*vj)*bs; 61 while (nz--) { 62 /* xk += U(k,:)*x(:) */ 63 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 64 vj++; 65 v += bs2; xj = t + (*vj)*bs; 66 } 67 idx = bs*r[k]; 68 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 69 } 70 71 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 72 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 73 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 74 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 75 PetscLogFlops(bs2*(2*a->nz + mbs)); 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 81 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 82 { 83 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 84 PetscErrorCode ierr; 85 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 86 PetscInt nz,*vj,k; 87 PetscInt bs=A->bs,bs2 = a->bs2; 88 MatScalar *aa=a->a,*v,*diag; 89 PetscScalar *x,*xk,*xj,*b,*xk_tmp; 90 91 PetscFunctionBegin; 92 93 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 94 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 95 96 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 97 98 /* solve U^T * D * y = b by forward substitution */ 99 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 100 for (k=0; k<mbs; k++){ 101 v = aa + bs2*ai[k]; 102 xk = x + k*bs; /* Dk*xk = k-th block of x */ 103 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 104 nz = ai[k+1] - ai[k]; 105 vj = aj + ai[k]; 106 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 107 while (nz--) { 108 /* x(:) += U(k,:)^T*(Dk*xk) */ 109 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 110 vj++; xj = x + (*vj)*bs; 111 v += bs2; 112 } 113 /* xk = inv(Dk)*(Dk*xk) */ 114 diag = aa+k*bs2; /* ptr to inv(Dk) */ 115 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 116 } 117 118 /* solve U*x = y by back substitution */ 119 for (k=mbs-1; k>=0; k--){ 120 v = aa + bs2*ai[k]; 121 xk = x + k*bs; /* xk */ 122 nz = ai[k+1] - ai[k]; 123 vj = aj + ai[k]; 124 xj = x + (*vj)*bs; 125 while (nz--) { 126 /* xk += U(k,:)*x(:) */ 127 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 128 vj++; 129 v += bs2; xj = x + (*vj)*bs; 130 } 131 } 132 133 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 134 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 135 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 136 PetscLogFlops(bs2*(2*a->nz + mbs)); 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 142 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 143 { 144 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 145 IS isrow=a->row; 146 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 147 PetscErrorCode ierr; 148 PetscInt nz,*vj,k,*r,idx; 149 MatScalar *aa=a->a,*v,*d; 150 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 151 152 PetscFunctionBegin; 153 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 155 t = a->solve_work; 156 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 157 158 /* solve U^T * D * y = b by forward substitution */ 159 tp = t; 160 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 161 idx = 7*r[k]; 162 tp[0] = b[idx]; 163 tp[1] = b[idx+1]; 164 tp[2] = b[idx+2]; 165 tp[3] = b[idx+3]; 166 tp[4] = b[idx+4]; 167 tp[5] = b[idx+5]; 168 tp[6] = b[idx+6]; 169 tp += 7; 170 } 171 172 for (k=0; k<mbs; k++){ 173 v = aa + 49*ai[k]; 174 vj = aj + ai[k]; 175 tp = t + k*7; 176 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 177 nz = ai[k+1] - ai[k]; 178 tp = t + (*vj)*7; 179 while (nz--) { 180 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 181 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 182 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 183 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 184 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 185 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 186 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 187 vj++; tp = t + (*vj)*7; 188 v += 49; 189 } 190 191 /* xk = inv(Dk)*(Dk*xk) */ 192 d = aa+k*49; /* ptr to inv(Dk) */ 193 tp = t + k*7; 194 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 195 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 196 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 197 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 198 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 199 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 200 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 201 } 202 203 /* solve U*x = y by back substitution */ 204 for (k=mbs-1; k>=0; k--){ 205 v = aa + 49*ai[k]; 206 vj = aj + ai[k]; 207 tp = t + k*7; 208 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 209 nz = ai[k+1] - ai[k]; 210 211 tp = t + (*vj)*7; 212 while (nz--) { 213 /* xk += U(k,:)*x(:) */ 214 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 215 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 216 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 217 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 218 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 219 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 220 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 221 vj++; tp = t + (*vj)*7; 222 v += 49; 223 } 224 tp = t + k*7; 225 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 226 idx = 7*r[k]; 227 x[idx] = x0; 228 x[idx+1] = x1; 229 x[idx+2] = x2; 230 x[idx+3] = x3; 231 x[idx+4] = x4; 232 x[idx+5] = x5; 233 x[idx+6] = x6; 234 } 235 236 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 237 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 238 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 239 PetscLogFlops(49*(2*a->nz + mbs)); 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 245 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 246 { 247 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 248 PetscErrorCode ierr; 249 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 250 MatScalar *aa=a->a,*v,*d; 251 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6; 252 PetscInt nz,*vj,k; 253 254 PetscFunctionBegin; 255 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 256 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 257 258 /* solve U^T * D * y = b by forward substitution */ 259 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 260 for (k=0; k<mbs; k++){ 261 v = aa + 49*ai[k]; 262 xp = x + k*7; 263 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 264 nz = ai[k+1] - ai[k]; 265 vj = aj + ai[k]; 266 xp = x + (*vj)*7; 267 while (nz--) { 268 /* x(:) += U(k,:)^T*(Dk*xk) */ 269 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 270 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 271 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 272 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 273 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 274 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 275 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 276 vj++; xp = x + (*vj)*7; 277 v += 49; 278 } 279 /* xk = inv(Dk)*(Dk*xk) */ 280 d = aa+k*49; /* ptr to inv(Dk) */ 281 xp = x + k*7; 282 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 283 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 284 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 285 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 286 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 287 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 288 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 289 } 290 291 /* solve U*x = y by back substitution */ 292 for (k=mbs-1; k>=0; k--){ 293 v = aa + 49*ai[k]; 294 xp = x + k*7; 295 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 296 nz = ai[k+1] - ai[k]; 297 vj = aj + ai[k]; 298 xp = x + (*vj)*7; 299 while (nz--) { 300 /* xk += U(k,:)*x(:) */ 301 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 302 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 303 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 304 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 305 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 306 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 307 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 308 vj++; 309 v += 49; xp = x + (*vj)*7; 310 } 311 xp = x + k*7; 312 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 313 } 314 315 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 316 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 317 PetscLogFlops(49*(2*a->nz + mbs)); 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 323 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 324 { 325 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 326 IS isrow=a->row; 327 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 328 PetscErrorCode ierr; 329 PetscInt nz,*vj,k,*r,idx; 330 MatScalar *aa=a->a,*v,*d; 331 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 332 333 PetscFunctionBegin; 334 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 336 t = a->solve_work; 337 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 338 339 /* solve U^T * D * y = b by forward substitution */ 340 tp = t; 341 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 342 idx = 6*r[k]; 343 tp[0] = b[idx]; 344 tp[1] = b[idx+1]; 345 tp[2] = b[idx+2]; 346 tp[3] = b[idx+3]; 347 tp[4] = b[idx+4]; 348 tp[5] = b[idx+5]; 349 tp += 6; 350 } 351 352 for (k=0; k<mbs; k++){ 353 v = aa + 36*ai[k]; 354 vj = aj + ai[k]; 355 tp = t + k*6; 356 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 357 nz = ai[k+1] - ai[k]; 358 tp = t + (*vj)*6; 359 while (nz--) { 360 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 361 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 362 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 363 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 364 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 365 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 366 vj++; tp = t + (*vj)*6; 367 v += 36; 368 } 369 370 /* xk = inv(Dk)*(Dk*xk) */ 371 d = aa+k*36; /* ptr to inv(Dk) */ 372 tp = t + k*6; 373 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 374 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 375 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 376 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 377 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 378 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 379 } 380 381 /* solve U*x = y by back substitution */ 382 for (k=mbs-1; k>=0; k--){ 383 v = aa + 36*ai[k]; 384 vj = aj + ai[k]; 385 tp = t + k*6; 386 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 387 nz = ai[k+1] - ai[k]; 388 389 tp = t + (*vj)*6; 390 while (nz--) { 391 /* xk += U(k,:)*x(:) */ 392 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 393 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 394 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 395 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 396 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 397 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 398 vj++; tp = t + (*vj)*6; 399 v += 36; 400 } 401 tp = t + k*6; 402 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 403 idx = 6*r[k]; 404 x[idx] = x0; 405 x[idx+1] = x1; 406 x[idx+2] = x2; 407 x[idx+3] = x3; 408 x[idx+4] = x4; 409 x[idx+5] = x5; 410 } 411 412 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 413 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 415 PetscLogFlops(36*(2*a->nz + mbs)); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 421 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 425 MatScalar *aa=a->a,*v,*d; 426 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5; 427 PetscErrorCode ierr; 428 PetscInt nz,*vj,k; 429 430 PetscFunctionBegin; 431 432 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434 435 /* solve U^T * D * y = b by forward substitution */ 436 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 437 for (k=0; k<mbs; k++){ 438 v = aa + 36*ai[k]; 439 xp = x + k*6; 440 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 441 nz = ai[k+1] - ai[k]; 442 vj = aj + ai[k]; 443 xp = x + (*vj)*6; 444 while (nz--) { 445 /* x(:) += U(k,:)^T*(Dk*xk) */ 446 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 447 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 448 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 449 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 450 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 451 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 452 vj++; xp = x + (*vj)*6; 453 v += 36; 454 } 455 /* xk = inv(Dk)*(Dk*xk) */ 456 d = aa+k*36; /* ptr to inv(Dk) */ 457 xp = x + k*6; 458 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 459 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 460 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 461 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 462 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 463 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 464 } 465 466 /* solve U*x = y by back substitution */ 467 for (k=mbs-1; k>=0; k--){ 468 v = aa + 36*ai[k]; 469 xp = x + k*6; 470 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 471 nz = ai[k+1] - ai[k]; 472 vj = aj + ai[k]; 473 xp = x + (*vj)*6; 474 while (nz--) { 475 /* xk += U(k,:)*x(:) */ 476 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 477 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 478 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 479 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 480 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 481 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 482 vj++; 483 v += 36; xp = x + (*vj)*6; 484 } 485 xp = x + k*6; 486 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 487 } 488 489 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 490 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 491 PetscLogFlops(36*(2*a->nz + mbs)); 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 497 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 498 { 499 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 500 IS isrow=a->row; 501 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 502 PetscErrorCode ierr; 503 PetscInt nz,*vj,k,*r,idx; 504 MatScalar *aa=a->a,*v,*diag; 505 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 506 507 PetscFunctionBegin; 508 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 510 t = a->solve_work; 511 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 512 513 /* solve U^T * D * y = b by forward substitution */ 514 tp = t; 515 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 516 idx = 5*r[k]; 517 tp[0] = b[idx]; 518 tp[1] = b[idx+1]; 519 tp[2] = b[idx+2]; 520 tp[3] = b[idx+3]; 521 tp[4] = b[idx+4]; 522 tp += 5; 523 } 524 525 for (k=0; k<mbs; k++){ 526 v = aa + 25*ai[k]; 527 vj = aj + ai[k]; 528 tp = t + k*5; 529 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 530 nz = ai[k+1] - ai[k]; 531 532 tp = t + (*vj)*5; 533 while (nz--) { 534 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 535 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 536 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 537 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 538 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 539 vj++; tp = t + (*vj)*5; 540 v += 25; 541 } 542 543 /* xk = inv(Dk)*(Dk*xk) */ 544 diag = aa+k*25; /* ptr to inv(Dk) */ 545 tp = t + k*5; 546 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 547 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 548 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 549 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 550 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 551 } 552 553 /* solve U*x = y by back substitution */ 554 for (k=mbs-1; k>=0; k--){ 555 v = aa + 25*ai[k]; 556 vj = aj + ai[k]; 557 tp = t + k*5; 558 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 559 nz = ai[k+1] - ai[k]; 560 561 tp = t + (*vj)*5; 562 while (nz--) { 563 /* xk += U(k,:)*x(:) */ 564 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 565 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 566 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 567 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 568 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 569 vj++; tp = t + (*vj)*5; 570 v += 25; 571 } 572 tp = t + k*5; 573 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 574 idx = 5*r[k]; 575 x[idx] = x0; 576 x[idx+1] = x1; 577 x[idx+2] = x2; 578 x[idx+3] = x3; 579 x[idx+4] = x4; 580 } 581 582 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 583 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 584 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 585 PetscLogFlops(25*(2*a->nz + mbs)); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 591 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 592 { 593 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 594 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 595 MatScalar *aa=a->a,*v,*diag; 596 PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4; 597 PetscErrorCode ierr; 598 PetscInt nz,*vj,k; 599 600 PetscFunctionBegin; 601 602 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 603 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 604 605 /* solve U^T * D * y = b by forward substitution */ 606 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 607 for (k=0; k<mbs; k++){ 608 v = aa + 25*ai[k]; 609 xp = x + k*5; 610 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 611 nz = ai[k+1] - ai[k]; 612 vj = aj + ai[k]; 613 xp = x + (*vj)*5; 614 while (nz--) { 615 /* x(:) += U(k,:)^T*(Dk*xk) */ 616 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 617 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 618 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 619 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 620 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 621 vj++; xp = x + (*vj)*5; 622 v += 25; 623 } 624 /* xk = inv(Dk)*(Dk*xk) */ 625 diag = aa+k*25; /* ptr to inv(Dk) */ 626 xp = x + k*5; 627 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 628 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 629 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 630 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 631 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 632 } 633 634 /* solve U*x = y by back substitution */ 635 for (k=mbs-1; k>=0; k--){ 636 v = aa + 25*ai[k]; 637 xp = x + k*5; 638 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 639 nz = ai[k+1] - ai[k]; 640 vj = aj + ai[k]; 641 xp = x + (*vj)*5; 642 while (nz--) { 643 /* xk += U(k,:)*x(:) */ 644 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 645 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 646 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 647 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 648 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 649 vj++; 650 v += 25; xp = x + (*vj)*5; 651 } 652 xp = x + k*5; 653 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 654 } 655 656 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 PetscLogFlops(25*(2*a->nz + mbs)); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 664 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 665 { 666 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 667 IS isrow=a->row; 668 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 669 PetscErrorCode ierr; 670 PetscInt nz,*vj,k,*r,idx; 671 MatScalar *aa=a->a,*v,*diag; 672 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 673 674 PetscFunctionBegin; 675 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 676 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 677 t = a->solve_work; 678 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 679 680 /* solve U^T * D * y = b by forward substitution */ 681 tp = t; 682 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 683 idx = 4*r[k]; 684 tp[0] = b[idx]; 685 tp[1] = b[idx+1]; 686 tp[2] = b[idx+2]; 687 tp[3] = b[idx+3]; 688 tp += 4; 689 } 690 691 for (k=0; k<mbs; k++){ 692 v = aa + 16*ai[k]; 693 vj = aj + ai[k]; 694 tp = t + k*4; 695 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 696 nz = ai[k+1] - ai[k]; 697 698 tp = t + (*vj)*4; 699 while (nz--) { 700 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 701 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 702 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 703 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 704 vj++; tp = t + (*vj)*4; 705 v += 16; 706 } 707 708 /* xk = inv(Dk)*(Dk*xk) */ 709 diag = aa+k*16; /* ptr to inv(Dk) */ 710 tp = t + k*4; 711 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 712 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 713 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 714 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 715 } 716 717 /* solve U*x = y by back substitution */ 718 for (k=mbs-1; k>=0; k--){ 719 v = aa + 16*ai[k]; 720 vj = aj + ai[k]; 721 tp = t + k*4; 722 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 723 nz = ai[k+1] - ai[k]; 724 725 tp = t + (*vj)*4; 726 while (nz--) { 727 /* xk += U(k,:)*x(:) */ 728 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 729 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 730 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 731 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 732 vj++; tp = t + (*vj)*4; 733 v += 16; 734 } 735 tp = t + k*4; 736 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 737 idx = 4*r[k]; 738 x[idx] = x0; 739 x[idx+1] = x1; 740 x[idx+2] = x2; 741 x[idx+3] = x3; 742 } 743 744 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 745 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 PetscLogFlops(16*(2*a->nz + mbs)); 748 PetscFunctionReturn(0); 749 } 750 751 /* 752 Special case where the matrix was factored in the natural ordering. 753 This eliminates the need for the column and row permutation. 754 */ 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 757 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 758 { 759 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 760 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 761 MatScalar *aa=a->a,*v,*diag; 762 PetscScalar *x,*xp,*b,x0,x1,x2,x3; 763 PetscErrorCode ierr; 764 PetscInt nz,*vj,k; 765 766 PetscFunctionBegin; 767 768 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 769 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 770 771 /* solve U^T * D * y = b by forward substitution */ 772 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 773 for (k=0; k<mbs; k++){ 774 v = aa + 16*ai[k]; 775 xp = x + k*4; 776 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 777 nz = ai[k+1] - ai[k]; 778 vj = aj + ai[k]; 779 xp = x + (*vj)*4; 780 while (nz--) { 781 /* x(:) += U(k,:)^T*(Dk*xk) */ 782 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 783 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 784 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 785 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 786 vj++; xp = x + (*vj)*4; 787 v += 16; 788 } 789 /* xk = inv(Dk)*(Dk*xk) */ 790 diag = aa+k*16; /* ptr to inv(Dk) */ 791 xp = x + k*4; 792 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 793 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 794 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 795 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 796 } 797 798 /* solve U*x = y by back substitution */ 799 for (k=mbs-1; k>=0; k--){ 800 v = aa + 16*ai[k]; 801 xp = x + k*4; 802 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 803 nz = ai[k+1] - ai[k]; 804 vj = aj + ai[k]; 805 xp = x + (*vj)*4; 806 while (nz--) { 807 /* xk += U(k,:)*x(:) */ 808 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 809 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 810 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 811 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 812 vj++; 813 v += 16; xp = x + (*vj)*4; 814 } 815 xp = x + k*4; 816 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 817 } 818 819 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 820 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 821 PetscLogFlops(16*(2*a->nz + mbs)); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 827 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 828 { 829 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 830 IS isrow=a->row; 831 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 832 PetscErrorCode ierr; 833 PetscInt nz,*vj,k,*r,idx; 834 MatScalar *aa=a->a,*v,*diag; 835 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 836 837 PetscFunctionBegin; 838 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 839 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 840 t = a->solve_work; 841 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 842 843 /* solve U^T * D * y = b by forward substitution */ 844 tp = t; 845 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 846 idx = 3*r[k]; 847 tp[0] = b[idx]; 848 tp[1] = b[idx+1]; 849 tp[2] = b[idx+2]; 850 tp += 3; 851 } 852 853 for (k=0; k<mbs; k++){ 854 v = aa + 9*ai[k]; 855 vj = aj + ai[k]; 856 tp = t + k*3; 857 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 858 nz = ai[k+1] - ai[k]; 859 860 tp = t + (*vj)*3; 861 while (nz--) { 862 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 863 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 864 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 865 vj++; tp = t + (*vj)*3; 866 v += 9; 867 } 868 869 /* xk = inv(Dk)*(Dk*xk) */ 870 diag = aa+k*9; /* ptr to inv(Dk) */ 871 tp = t + k*3; 872 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 873 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 874 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 875 } 876 877 /* solve U*x = y by back substitution */ 878 for (k=mbs-1; k>=0; k--){ 879 v = aa + 9*ai[k]; 880 vj = aj + ai[k]; 881 tp = t + k*3; 882 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 883 nz = ai[k+1] - ai[k]; 884 885 tp = t + (*vj)*3; 886 while (nz--) { 887 /* xk += U(k,:)*x(:) */ 888 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 889 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 890 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 891 vj++; tp = t + (*vj)*3; 892 v += 9; 893 } 894 tp = t + k*3; 895 tp[0] = x0; tp[1] = x1; tp[2] = x2; 896 idx = 3*r[k]; 897 x[idx] = x0; 898 x[idx+1] = x1; 899 x[idx+2] = x2; 900 } 901 902 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 903 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 904 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 905 PetscLogFlops(9*(2*a->nz + mbs)); 906 PetscFunctionReturn(0); 907 } 908 909 /* 910 Special case where the matrix was factored in the natural ordering. 911 This eliminates the need for the column and row permutation. 912 */ 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 915 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 916 { 917 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 918 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 919 MatScalar *aa=a->a,*v,*diag; 920 PetscScalar *x,*xp,*b,x0,x1,x2; 921 PetscErrorCode ierr; 922 PetscInt nz,*vj,k; 923 924 PetscFunctionBegin; 925 926 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 927 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 928 929 /* solve U^T * D * y = b by forward substitution */ 930 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 931 for (k=0; k<mbs; k++){ 932 v = aa + 9*ai[k]; 933 xp = x + k*3; 934 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 935 nz = ai[k+1] - ai[k]; 936 vj = aj + ai[k]; 937 xp = x + (*vj)*3; 938 while (nz--) { 939 /* x(:) += U(k,:)^T*(Dk*xk) */ 940 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 941 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 942 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 943 vj++; xp = x + (*vj)*3; 944 v += 9; 945 } 946 /* xk = inv(Dk)*(Dk*xk) */ 947 diag = aa+k*9; /* ptr to inv(Dk) */ 948 xp = x + k*3; 949 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 950 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 951 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 952 } 953 954 /* solve U*x = y by back substitution */ 955 for (k=mbs-1; k>=0; k--){ 956 v = aa + 9*ai[k]; 957 xp = x + k*3; 958 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 959 nz = ai[k+1] - ai[k]; 960 vj = aj + ai[k]; 961 xp = x + (*vj)*3; 962 while (nz--) { 963 /* xk += U(k,:)*x(:) */ 964 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 965 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 966 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 967 vj++; 968 v += 9; xp = x + (*vj)*3; 969 } 970 xp = x + k*3; 971 xp[0] = x0; xp[1] = x1; xp[2] = x2; 972 } 973 974 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 975 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 976 PetscLogFlops(9*(2*a->nz + mbs)); 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 982 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 983 { 984 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 985 IS isrow=a->row; 986 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 987 PetscErrorCode ierr; 988 PetscInt nz,*vj,k,k2,*r,idx; 989 MatScalar *aa=a->a,*v,*diag; 990 PetscScalar *x,*b,x0,x1,*t; 991 992 PetscFunctionBegin; 993 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 994 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 995 t = a->solve_work; 996 /* printf("called MatSolve_SeqSBAIJ_2\n"); */ 997 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 998 999 /* solve U^T * D * y = perm(b) by forward substitution */ 1000 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1001 idx = 2*r[k]; 1002 t[k*2] = b[idx]; 1003 t[k*2+1] = b[idx+1]; 1004 } 1005 for (k=0; k<mbs; k++){ 1006 v = aa + 4*ai[k]; 1007 vj = aj + ai[k]; 1008 k2 = k*2; 1009 x0 = t[k2]; x1 = t[k2+1]; 1010 nz = ai[k+1] - ai[k]; 1011 while (nz--) { 1012 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1013 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1014 vj++; v += 4; 1015 } 1016 diag = aa+k*4; /* ptr to inv(Dk) */ 1017 t[k2] = diag[0]*x0 + diag[2]*x1; 1018 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1019 } 1020 1021 /* solve U*x = y by back substitution */ 1022 for (k=mbs-1; k>=0; k--){ 1023 v = aa + 4*ai[k]; 1024 vj = aj + ai[k]; 1025 k2 = k*2; 1026 x0 = t[k2]; x1 = t[k2+1]; 1027 nz = ai[k+1] - ai[k]; 1028 while (nz--) { 1029 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1030 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1031 vj++; v += 4; 1032 } 1033 t[k2] = x0; 1034 t[k2+1] = x1; 1035 idx = 2*r[k]; 1036 x[idx] = x0; 1037 x[idx+1] = x1; 1038 } 1039 1040 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1041 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1042 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1043 PetscLogFlops(4*(2*a->nz + mbs)); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 /* 1048 Special case where the matrix was factored in the natural ordering. 1049 This eliminates the need for the column and row permutation. 1050 */ 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1053 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1054 { 1055 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1056 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1057 MatScalar *aa=a->a,*v,*diag; 1058 PetscScalar *x,*b,x0,x1; 1059 PetscErrorCode ierr; 1060 PetscInt nz,*vj,k,k2; 1061 1062 PetscFunctionBegin; 1063 1064 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1065 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1066 1067 /* solve U^T * D * y = b by forward substitution */ 1068 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1069 for (k=0; k<mbs; k++){ 1070 v = aa + 4*ai[k]; 1071 vj = aj + ai[k]; 1072 k2 = k*2; 1073 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1074 nz = ai[k+1] - ai[k]; 1075 1076 while (nz--) { 1077 /* x(:) += U(k,:)^T*(Dk*xk) */ 1078 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1079 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1080 vj++; v += 4; 1081 } 1082 /* xk = inv(Dk)*(Dk*xk) */ 1083 diag = aa+k*4; /* ptr to inv(Dk) */ 1084 x[k2] = diag[0]*x0 + diag[2]*x1; 1085 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1086 } 1087 1088 /* solve U*x = y by back substitution */ 1089 for (k=mbs-1; k>=0; k--){ 1090 v = aa + 4*ai[k]; 1091 vj = aj + ai[k]; 1092 k2 = k*2; 1093 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1094 nz = ai[k+1] - ai[k]; 1095 while (nz--) { 1096 /* xk += U(k,:)*x(:) */ 1097 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1098 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1099 vj++; v += 4; 1100 } 1101 x[k2] = x0; 1102 x[k2+1] = x1; 1103 } 1104 1105 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1106 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1107 PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */ 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1113 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1114 { 1115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1116 IS isrow=a->row; 1117 PetscErrorCode ierr; 1118 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip; 1119 MatScalar *aa=a->a,*v; 1120 PetscScalar *x,*b,xk,*t; 1121 PetscInt nz,*vj,k; 1122 1123 PetscFunctionBegin; 1124 if (!mbs) PetscFunctionReturn(0); 1125 /* printf(" MatSolve_SeqSBAIJ_1 is called\n"); */ 1126 1127 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1128 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1129 t = a->solve_work; 1130 1131 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1132 1133 /* solve U^T*D*y = perm(b) by forward substitution */ 1134 for (k=0; k<mbs; k++) t[k] = b[rip[k]]; 1135 for (k=0; k<mbs; k++){ 1136 v = aa + ai[k] + 1; 1137 vj = aj + ai[k] + 1; 1138 xk = t[k]; 1139 nz = ai[k+1] - ai[k] - 1; 1140 while (nz--) t[*vj++] += (*v++) * xk; 1141 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1142 } 1143 1144 /* solve U*x = y by back substitution */ 1145 for (k=mbs-1; k>=0; k--){ 1146 v = aa + ai[k] + 1; 1147 vj = aj + ai[k] + 1; 1148 xk = t[k]; 1149 nz = ai[k+1] - ai[k] - 1; 1150 while (nz--) xk += (*v++) * t[*vj++]; 1151 t[k] = xk; 1152 x[rip[k]] = xk; 1153 } 1154 1155 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1156 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1157 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1158 PetscLogFlops(4*a->nz + A->m); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1164 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1165 { 1166 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 if (A->bs == 1) { 1171 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1172 } else { 1173 IS isrow=a->row; 1174 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rip,i; 1175 MatScalar *aa=a->a,*v; 1176 PetscScalar *x,*b,*t; 1177 PetscInt nz,*vj,k,n; 1178 if (bb->n > a->solves_work_n) { 1179 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 1180 ierr = PetscMalloc(bb->n*A->m*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1181 a->solves_work_n = bb->n; 1182 } 1183 n = bb->n; 1184 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1185 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1186 t = a->solves_work; 1187 1188 ierr = ISGetIndices(isrow,&rip);CHKERRQ(ierr); 1189 1190 /* solve U^T*D*y = perm(b) by forward substitution */ 1191 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rip[k]+i*mbs];} /* values are stored interlaced in t */ 1192 for (k=0; k<mbs; k++){ 1193 v = aa + ai[k]; 1194 vj = aj + ai[k]; 1195 nz = ai[k+1] - ai[k]; 1196 while (nz--) { 1197 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1198 v++;vj++; 1199 } 1200 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1201 } 1202 1203 /* solve U*x = y by back substitution */ 1204 for (k=mbs-1; k>=0; k--){ 1205 v = aa + ai[k]; 1206 vj = aj + ai[k]; 1207 nz = ai[k+1] - ai[k]; 1208 while (nz--) { 1209 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1210 v++;vj++; 1211 } 1212 for (i=0; i<n; i++) x[rip[k]+i*mbs] = t[n*k+i]; 1213 } 1214 1215 ierr = ISRestoreIndices(isrow,&rip);CHKERRQ(ierr); 1216 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1217 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1218 PetscLogFlops(bb->n*(4*a->nz + A->m)); 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 Special case where the matrix was ILU(0) factored in the natural 1225 ordering. This eliminates the need for the column and row permutation. 1226 */ 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1229 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1230 { 1231 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1232 PetscErrorCode ierr; 1233 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1234 MatScalar *aa=a->a,*v; 1235 PetscScalar *x,*b,xk; 1236 PetscInt nz,*vj,k; 1237 1238 PetscFunctionBegin; 1239 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1241 1242 /* solve U^T*D*y = b by forward substitution */ 1243 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1244 for (k=0; k<mbs; k++){ 1245 v = aa + ai[k] + 1; 1246 vj = aj + ai[k] + 1; 1247 xk = x[k]; 1248 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1249 while (nz--) x[*vj++] += (*v++) * xk; 1250 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1251 } 1252 1253 /* solve U*x = y by back substitution */ 1254 for (k=mbs-2; k>=0; k--){ 1255 v = aa + ai[k] + 1; 1256 vj = aj + ai[k] + 1; 1257 xk = x[k]; 1258 nz = ai[k+1] - ai[k] - 1; 1259 while (nz--) xk += (*v++) * x[*vj++]; 1260 x[k] = xk; 1261 } 1262 1263 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1264 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1265 PetscLogFlops(4*a->nz + A->m); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1272 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1273 { 1274 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1275 PetscErrorCode ierr; 1276 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1277 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 1278 PetscInt m,reallocs = 0,*levtmp; 1279 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1280 PetscInt incrlev,*lev,shift,prow,nz; 1281 PetscReal f = info->fill,levels = info->levels; 1282 PetscTruth perm_identity; 1283 1284 PetscFunctionBegin; 1285 /* check whether perm is the identity mapping */ 1286 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1287 1288 if (perm_identity){ 1289 a->permute = PETSC_FALSE; 1290 ai = a->i; aj = a->j; 1291 } else { /* non-trivial permutation */ 1292 a->permute = PETSC_TRUE; 1293 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1294 ai = a->inew; aj = a->jnew; 1295 } 1296 1297 /* initialization */ 1298 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1299 umax = (PetscInt)(f*ai[mbs] + 1); 1300 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1301 umax += mbs + 1; 1302 shift = mbs + 1; 1303 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1304 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1305 iu[0] = mbs + 1; 1306 juidx = mbs + 1; 1307 /* prowl: linked list for pivot row */ 1308 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1309 /* q: linked list for col index */ 1310 q = prowl + mbs; 1311 levtmp = q + mbs; 1312 1313 for (i=0; i<mbs; i++){ 1314 prowl[i] = mbs; 1315 q[i] = 0; 1316 } 1317 1318 /* for each row k */ 1319 for (k=0; k<mbs; k++){ 1320 nzk = 0; 1321 q[k] = mbs; 1322 /* copy current row into linked list */ 1323 nz = ai[rip[k]+1] - ai[rip[k]]; 1324 j = ai[rip[k]]; 1325 while (nz--){ 1326 vj = rip[aj[j++]]; 1327 if (vj > k){ 1328 qm = k; 1329 do { 1330 m = qm; qm = q[m]; 1331 } while(qm < vj); 1332 if (qm == vj) { 1333 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1334 } 1335 nzk++; 1336 q[m] = vj; 1337 q[vj] = qm; 1338 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1339 } 1340 } 1341 1342 /* modify nonzero structure of k-th row by computing fill-in 1343 for each row prow to be merged in */ 1344 prow = k; 1345 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1346 1347 while (prow < k){ 1348 /* merge row prow into k-th row */ 1349 jmin = iu[prow] + 1; 1350 jmax = iu[prow+1]; 1351 qm = k; 1352 for (j=jmin; j<jmax; j++){ 1353 incrlev = lev[j-shift] + 1; 1354 if (incrlev > levels) continue; 1355 vj = ju[j]; 1356 do { 1357 m = qm; qm = q[m]; 1358 } while (qm < vj); 1359 if (qm != vj){ /* a fill */ 1360 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1361 levtmp[vj] = incrlev; 1362 } else { 1363 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1364 } 1365 } 1366 prow = prowl[prow]; /* next pivot row */ 1367 } 1368 1369 /* add k to row list for first nonzero element in k-th row */ 1370 if (nzk > 1){ 1371 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1372 prowl[k] = prowl[i]; prowl[i] = k; 1373 } 1374 iu[k+1] = iu[k] + nzk; 1375 1376 /* allocate more space to ju and lev if needed */ 1377 if (iu[k+1] > umax) { 1378 /* estimate how much additional space we will need */ 1379 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1380 /* just double the memory each time */ 1381 maxadd = umax; 1382 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1383 umax += maxadd; 1384 1385 /* allocate a longer ju */ 1386 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1387 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 1388 ierr = PetscFree(ju);CHKERRQ(ierr); 1389 ju = jutmp; 1390 1391 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 1392 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 1393 ierr = PetscFree(lev);CHKERRQ(ierr); 1394 lev = jutmp; 1395 reallocs += 2; /* count how many times we realloc */ 1396 } 1397 1398 /* save nonzero structure of k-th row in ju */ 1399 i=k; 1400 while (nzk --) { 1401 i = q[i]; 1402 ju[juidx] = i; 1403 lev[juidx-shift] = levtmp[i]; 1404 juidx++; 1405 } 1406 } 1407 1408 if (ai[mbs] != 0) { 1409 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1410 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1411 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_icc_fill %g or use \n",af); 1412 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCICCSetFill(pc,%g);\n",af); 1413 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.\n"); 1414 } else { 1415 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 1416 } 1417 1418 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1419 ierr = PetscFree(prowl);CHKERRQ(ierr); 1420 ierr = PetscFree(lev);CHKERRQ(ierr); 1421 1422 /* put together the new matrix */ 1423 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 1424 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 1425 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1426 1427 /* PetscLogObjectParent(*B,iperm); */ 1428 b = (Mat_SeqSBAIJ*)(*B)->data; 1429 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1430 b->singlemalloc = PETSC_FALSE; 1431 /* the next line frees the default space generated by the Create() */ 1432 ierr = PetscFree(b->a);CHKERRQ(ierr); 1433 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1434 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 1435 b->j = ju; 1436 b->i = iu; 1437 b->diag = 0; 1438 b->ilen = 0; 1439 b->imax = 0; 1440 1441 if (b->row) { 1442 ierr = ISDestroy(b->row);CHKERRQ(ierr); 1443 } 1444 if (b->icol) { 1445 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 1446 } 1447 b->row = perm; 1448 b->icol = perm; 1449 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1450 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1451 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1452 /* In b structure: Free imax, ilen, old a, old j. 1453 Allocate idnew, solve_work, new a, new j */ 1454 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1455 b->maxnz = b->nz = iu[mbs]; 1456 1457 (*B)->factor = FACTOR_CHOLESKY; 1458 (*B)->info.factor_mallocs = reallocs; 1459 (*B)->info.fill_ratio_given = f; 1460 if (ai[mbs] != 0) { 1461 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 1462 } else { 1463 (*B)->info.fill_ratio_needed = 0.0; 1464 } 1465 1466 if (perm_identity){ 1467 switch (bs) { 1468 case 1: 1469 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1470 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1471 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1472 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 1473 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1474 break; 1475 case 2: 1476 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1477 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1478 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1479 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1480 break; 1481 case 3: 1482 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1483 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1484 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1485 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1486 break; 1487 case 4: 1488 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1489 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1490 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1491 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1492 break; 1493 case 5: 1494 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1495 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1496 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1497 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1498 break; 1499 case 6: 1500 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1501 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1502 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1503 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1504 break; 1505 case 7: 1506 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1507 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1508 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1509 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1510 break; 1511 default: 1512 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1513 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1514 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1515 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1516 break; 1517 } 1518 } 1519 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #include "petscbt.h" 1524 #include "src/mat/utils/freespace.h" 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 1527 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1528 { 1529 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1530 Mat_SeqSBAIJ *b; 1531 Mat B; 1532 PetscErrorCode ierr; 1533 PetscTruth perm_identity; 1534 PetscInt bs=A->bs,am=a->mbs; 1535 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 1536 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1537 PetscInt nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1538 PetscReal fill=info->fill,levels=info->levels; 1539 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1540 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1541 PetscBT lnkbt; 1542 1543 PetscFunctionBegin; 1544 /* 1545 This code originally uses Modified Sparse Row (MSR) storage 1546 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 1547 Then it is rewritten so the factor B takes seqsbaij format. However the associated 1548 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 1549 thus the original code in MSR format is still used for these cases. 1550 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 1551 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 1552 */ 1553 if (bs > 1){ 1554 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 /* check whether perm is the identity mapping */ 1559 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1560 1561 /* special case that simply copies fill pattern */ 1562 if (!levels && perm_identity) { 1563 a->permute = PETSC_FALSE; 1564 ai = a->i; aj = a->j; 1565 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1566 ierr = PetscMemcpy(ui,ai,(am+1)*sizeof(PetscInt));CHKERRQ(ierr); 1567 ierr = PetscMalloc(ai[am]*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1568 ierr = PetscMemcpy(uj,aj,ai[am]*sizeof(PetscInt));CHKERRQ(ierr); 1569 1570 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1571 if (perm_identity){ 1572 a->permute = PETSC_FALSE; 1573 ai = a->i; aj = a->j; 1574 } else { /* non-trivial permutation */ 1575 a->permute = PETSC_TRUE; 1576 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1577 ai = a->inew; aj = a->jnew; 1578 } 1579 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1580 1581 /* initialization */ 1582 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1583 ui[0] = 0; 1584 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1585 1586 /* jl: linked list for storing indices of the pivot rows 1587 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1588 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1589 il = jl + am; 1590 uj_ptr = (PetscInt**)(il + am); 1591 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1592 for (i=0; i<am; i++){ 1593 jl[i] = am; il[i] = 0; 1594 } 1595 1596 /* create and initialize a linked list for storing column indices of the active row k */ 1597 nlnk = am + 1; 1598 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1599 1600 /* initial FreeSpace size is fill*(ai[am]+1) */ 1601 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1602 current_space = free_space; 1603 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1604 current_space_lvl = free_space_lvl; 1605 1606 for (k=0; k<am; k++){ /* for each active row k */ 1607 /* initialize lnk by the column indices of row rip[k] */ 1608 nzk = 0; 1609 ncols = ai[rip[k]+1] - ai[rip[k]]; 1610 cols = cols_lvl + am; 1611 for (j=0; j<ncols; j++){ 1612 i = *(aj + ai[rip[k]] + j); 1613 cols[j] = rip[i]; 1614 cols_lvl[j] = -1; /* initialize level for nonzero entries */ 1615 } 1616 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1617 nzk += nlnk; 1618 1619 /* update lnk by computing fill-in for each pivot row to be merged in */ 1620 prow = jl[k]; /* 1st pivot row */ 1621 1622 while (prow < k){ 1623 nextprow = jl[prow]; 1624 1625 /* merge prow into k-th row */ 1626 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1627 jmax = ui[prow+1]; 1628 ncols = jmax-jmin; 1629 i = jmin - ui[prow]; 1630 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1631 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1632 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1633 nzk += nlnk; 1634 1635 /* update il and jl for prow */ 1636 if (jmin < jmax){ 1637 il[prow] = jmin; 1638 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1639 } 1640 prow = nextprow; 1641 } 1642 1643 /* if free space is not available, make more free space */ 1644 if (current_space->local_remaining<nzk) { 1645 i = am - k + 1; /* num of unfactored rows */ 1646 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1647 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1648 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1649 reallocs++; 1650 } 1651 1652 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1653 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1654 1655 /* add the k-th row into il and jl */ 1656 if (nzk-1 > 0){ 1657 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1658 jl[k] = jl[i]; jl[i] = k; 1659 il[k] = ui[k] + 1; 1660 } 1661 uj_ptr[k] = current_space->array; 1662 uj_lvl_ptr[k] = current_space_lvl->array; 1663 1664 current_space->array += nzk; 1665 current_space->local_used += nzk; 1666 current_space->local_remaining -= nzk; 1667 current_space_lvl->array += nzk; 1668 current_space_lvl->local_used += nzk; 1669 current_space_lvl->local_remaining -= nzk; 1670 1671 ui[k+1] = ui[k] + nzk; 1672 } 1673 1674 if (ai[am] != 0) { 1675 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1676 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1677 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1678 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1679 } else { 1680 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1681 } 1682 1683 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1684 ierr = PetscFree(jl);CHKERRQ(ierr); 1685 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1686 1687 /* destroy list of free space and other temporary array(s) */ 1688 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1689 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1690 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1691 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1692 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1693 1694 /* put together the new matrix in MATSEQSBAIJ format */ 1695 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1696 B = *fact; 1697 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1698 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 1699 1700 b = (Mat_SeqSBAIJ*)B->data; 1701 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1702 b->singlemalloc = PETSC_FALSE; 1703 /* the next line frees the default space generated by the Create() */ 1704 ierr = PetscFree(b->a);CHKERRQ(ierr); 1705 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1706 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1707 b->j = uj; 1708 b->i = ui; 1709 b->diag = 0; 1710 b->ilen = 0; 1711 b->imax = 0; 1712 b->row = perm; 1713 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1714 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1715 b->icol = perm; 1716 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1717 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1718 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1719 b->maxnz = b->nz = ui[am]; 1720 1721 B->factor = FACTOR_CHOLESKY; 1722 B->info.factor_mallocs = reallocs; 1723 B->info.fill_ratio_given = fill; 1724 if (ai[am] != 0) { 1725 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1726 } else { 1727 B->info.fill_ratio_needed = 0.0; 1728 } 1729 1730 if (perm_identity){ 1731 switch (bs) { 1732 case 1: 1733 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1734 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1735 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1736 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJl:Using special in-place natural ordering factor and solve BS=1\n"); 1737 break; 1738 case 2: 1739 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1740 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1741 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1742 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1743 break; 1744 case 3: 1745 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1746 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1747 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1748 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1749 break; 1750 case 4: 1751 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1752 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1753 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1754 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1755 break; 1756 case 5: 1757 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1758 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1759 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1760 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1761 break; 1762 case 6: 1763 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1764 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1765 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1766 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1767 break; 1768 case 7: 1769 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1770 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1771 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1772 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1773 break; 1774 default: 1775 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1776 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1777 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 1778 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 1779 break; 1780 } 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785