1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/inline/ilu.h" 10 #include "src/inline/dot.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 14 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 17 IS isrow=a->row; 18 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 19 PetscErrorCode ierr; 20 PetscInt nz,*vj,k,*r,idx,k1; 21 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 169 PetscFunctionBegin; 170 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 173 /* solve U^T * D * y = b by forward substitution */ 174 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 175 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 176 177 /* solve U*x = y by back substitution */ 178 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 182 ierr = PetscLogFlops(bs2*(2*a->nz + mbs));CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 188 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 189 { 190 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 191 PetscErrorCode ierr; 192 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 193 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 194 MatScalar *aa=a->a; 195 PetscScalar *x,*b; 196 197 PetscFunctionBegin; 198 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 199 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 200 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 201 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 202 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = PetscLogFlops(bs2*a->nz + A->rmap.N);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 210 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 211 { 212 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 213 PetscErrorCode ierr; 214 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 215 PetscInt bs=A->rmap.bs,bs2 = a->bs2; 216 MatScalar *aa=a->a; 217 PetscScalar *x,*b; 218 219 PetscFunctionBegin; 220 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 221 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 222 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 223 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 225 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 226 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 232 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 233 { 234 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 235 IS isrow=a->row; 236 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 237 PetscErrorCode ierr; 238 PetscInt nz,*vj,k,*r,idx; 239 MatScalar *aa=a->a,*v,*d; 240 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 241 242 PetscFunctionBegin; 243 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 244 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 245 t = a->solve_work; 246 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 247 248 /* solve U^T * D * y = b by forward substitution */ 249 tp = t; 250 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 251 idx = 7*r[k]; 252 tp[0] = b[idx]; 253 tp[1] = b[idx+1]; 254 tp[2] = b[idx+2]; 255 tp[3] = b[idx+3]; 256 tp[4] = b[idx+4]; 257 tp[5] = b[idx+5]; 258 tp[6] = b[idx+6]; 259 tp += 7; 260 } 261 262 for (k=0; k<mbs; k++){ 263 v = aa + 49*ai[k]; 264 vj = aj + ai[k]; 265 tp = t + k*7; 266 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 267 nz = ai[k+1] - ai[k]; 268 tp = t + (*vj)*7; 269 while (nz--) { 270 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 271 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 272 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 273 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 274 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 275 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 276 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 277 vj++; tp = t + (*vj)*7; 278 v += 49; 279 } 280 281 /* xk = inv(Dk)*(Dk*xk) */ 282 d = aa+k*49; /* ptr to inv(Dk) */ 283 tp = t + k*7; 284 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 285 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 286 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 287 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 288 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 289 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 290 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 291 } 292 293 /* solve U*x = y by back substitution */ 294 for (k=mbs-1; k>=0; k--){ 295 v = aa + 49*ai[k]; 296 vj = aj + ai[k]; 297 tp = t + k*7; 298 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 299 nz = ai[k+1] - ai[k]; 300 301 tp = t + (*vj)*7; 302 while (nz--) { 303 /* xk += U(k,:)*x(:) */ 304 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 305 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 306 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 307 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 308 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 309 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 310 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 311 vj++; tp = t + (*vj)*7; 312 v += 49; 313 } 314 tp = t + k*7; 315 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 316 idx = 7*r[k]; 317 x[idx] = x0; 318 x[idx+1] = x1; 319 x[idx+2] = x2; 320 x[idx+3] = x3; 321 x[idx+4] = x4; 322 x[idx+5] = x5; 323 x[idx+6] = x6; 324 } 325 326 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 327 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 328 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 329 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 335 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 336 { 337 MatScalar *v,*d; 338 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 339 PetscInt nz,*vj,k; 340 341 PetscFunctionBegin; 342 for (k=0; k<mbs; k++){ 343 v = aa + 49*ai[k]; 344 xp = x + k*7; 345 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 346 nz = ai[k+1] - ai[k]; 347 vj = aj + ai[k]; 348 xp = x + (*vj)*7; 349 while (nz--) { 350 /* x(:) += U(k,:)^T*(Dk*xk) */ 351 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 352 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 353 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 354 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 355 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 356 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 357 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 358 vj++; xp = x + (*vj)*7; 359 v += 49; 360 } 361 /* xk = inv(Dk)*(Dk*xk) */ 362 d = aa+k*49; /* ptr to inv(Dk) */ 363 xp = x + k*7; 364 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 365 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 366 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 367 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 368 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 369 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 370 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 377 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 378 { 379 MatScalar *v; 380 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 381 PetscInt nz,*vj,k; 382 383 PetscFunctionBegin; 384 for (k=mbs-1; k>=0; k--){ 385 v = aa + 49*ai[k]; 386 xp = x + k*7; 387 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 388 nz = ai[k+1] - ai[k]; 389 vj = aj + ai[k]; 390 xp = x + (*vj)*7; 391 while (nz--) { 392 /* xk += U(k,:)*x(:) */ 393 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 394 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 395 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 396 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 397 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 398 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 399 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 400 vj++; 401 v += 49; xp = x + (*vj)*7; 402 } 403 xp = x + k*7; 404 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 405 } 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 411 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 412 { 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 PetscErrorCode ierr; 415 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 416 MatScalar *aa=a->a; 417 PetscScalar *x,*b; 418 419 PetscFunctionBegin; 420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 422 423 /* solve U^T * D * y = b by forward substitution */ 424 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 425 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 426 427 /* solve U*x = y by back substitution */ 428 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 429 430 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 432 ierr = PetscLogFlops(49*(2*a->nz + mbs));CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 438 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 439 { 440 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 441 PetscErrorCode ierr; 442 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 443 MatScalar *aa=a->a; 444 PetscScalar *x,*b; 445 446 PetscFunctionBegin; 447 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 450 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 451 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 452 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 453 ierr = PetscLogFlops(49*a->nz + mbs);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 459 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 460 { 461 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 462 PetscErrorCode ierr; 463 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 464 MatScalar *aa=a->a; 465 PetscScalar *x,*b; 466 467 PetscFunctionBegin; 468 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 469 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 470 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 471 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 473 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 474 ierr = PetscLogFlops(49*a->nz);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 480 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 481 { 482 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 483 IS isrow=a->row; 484 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 485 PetscErrorCode ierr; 486 PetscInt nz,*vj,k,*r,idx; 487 MatScalar *aa=a->a,*v,*d; 488 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 489 490 PetscFunctionBegin; 491 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 492 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 493 t = a->solve_work; 494 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 495 496 /* solve U^T * D * y = b by forward substitution */ 497 tp = t; 498 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 499 idx = 6*r[k]; 500 tp[0] = b[idx]; 501 tp[1] = b[idx+1]; 502 tp[2] = b[idx+2]; 503 tp[3] = b[idx+3]; 504 tp[4] = b[idx+4]; 505 tp[5] = b[idx+5]; 506 tp += 6; 507 } 508 509 for (k=0; k<mbs; k++){ 510 v = aa + 36*ai[k]; 511 vj = aj + ai[k]; 512 tp = t + k*6; 513 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 514 nz = ai[k+1] - ai[k]; 515 tp = t + (*vj)*6; 516 while (nz--) { 517 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 518 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 519 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 520 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 521 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 522 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 523 vj++; tp = t + (*vj)*6; 524 v += 36; 525 } 526 527 /* xk = inv(Dk)*(Dk*xk) */ 528 d = aa+k*36; /* ptr to inv(Dk) */ 529 tp = t + k*6; 530 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 531 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 532 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 533 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 534 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 535 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 536 } 537 538 /* solve U*x = y by back substitution */ 539 for (k=mbs-1; k>=0; k--){ 540 v = aa + 36*ai[k]; 541 vj = aj + ai[k]; 542 tp = t + k*6; 543 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 544 nz = ai[k+1] - ai[k]; 545 546 tp = t + (*vj)*6; 547 while (nz--) { 548 /* xk += U(k,:)*x(:) */ 549 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 550 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 551 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 552 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 553 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 554 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 555 vj++; tp = t + (*vj)*6; 556 v += 36; 557 } 558 tp = t + k*6; 559 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 560 idx = 6*r[k]; 561 x[idx] = x0; 562 x[idx+1] = x1; 563 x[idx+2] = x2; 564 x[idx+3] = x3; 565 x[idx+4] = x4; 566 x[idx+5] = x5; 567 } 568 569 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 570 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 571 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 572 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 578 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 579 { 580 MatScalar *v,*d; 581 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 582 PetscInt nz,*vj,k; 583 584 PetscFunctionBegin; 585 for (k=0; k<mbs; k++){ 586 v = aa + 36*ai[k]; 587 xp = x + k*6; 588 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 589 nz = ai[k+1] - ai[k]; 590 vj = aj + ai[k]; 591 xp = x + (*vj)*6; 592 while (nz--) { 593 /* x(:) += U(k,:)^T*(Dk*xk) */ 594 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 595 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 596 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 597 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 598 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 599 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 600 vj++; xp = x + (*vj)*6; 601 v += 36; 602 } 603 /* xk = inv(Dk)*(Dk*xk) */ 604 d = aa+k*36; /* ptr to inv(Dk) */ 605 xp = x + k*6; 606 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 607 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 608 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 609 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 610 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 611 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 612 } 613 PetscFunctionReturn(0); 614 } 615 #undef __FUNCT__ 616 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 617 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 618 { 619 MatScalar *v; 620 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 621 PetscInt nz,*vj,k; 622 623 PetscFunctionBegin; 624 for (k=mbs-1; k>=0; k--){ 625 v = aa + 36*ai[k]; 626 xp = x + k*6; 627 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 628 nz = ai[k+1] - ai[k]; 629 vj = aj + ai[k]; 630 xp = x + (*vj)*6; 631 while (nz--) { 632 /* xk += U(k,:)*x(:) */ 633 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 634 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 635 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 636 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 637 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 638 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 639 vj++; 640 v += 36; xp = x + (*vj)*6; 641 } 642 xp = x + k*6; 643 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 644 } 645 PetscFunctionReturn(0); 646 } 647 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 651 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 652 { 653 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 654 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 655 MatScalar *aa=a->a; 656 PetscScalar *x,*b; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 662 663 /* solve U^T * D * y = b by forward substitution */ 664 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 665 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 666 667 /* solve U*x = y by back substitution */ 668 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 669 670 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = PetscLogFlops(36*(2*a->nz + mbs));CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 678 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 679 { 680 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 681 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 682 MatScalar *aa=a->a; 683 PetscScalar *x,*b; 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 688 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 689 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 690 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 691 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = PetscLogFlops(36*a->nz + mbs);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 699 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 700 { 701 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 702 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 703 MatScalar *aa=a->a; 704 PetscScalar *x,*b; 705 PetscErrorCode ierr; 706 707 PetscFunctionBegin; 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 711 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 712 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 713 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 714 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 720 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 721 { 722 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 723 IS isrow=a->row; 724 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 725 PetscErrorCode ierr; 726 PetscInt nz,*vj,k,*r,idx; 727 MatScalar *aa=a->a,*v,*diag; 728 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 729 730 PetscFunctionBegin; 731 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 732 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 733 t = a->solve_work; 734 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 735 736 /* solve U^T * D * y = b by forward substitution */ 737 tp = t; 738 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 739 idx = 5*r[k]; 740 tp[0] = b[idx]; 741 tp[1] = b[idx+1]; 742 tp[2] = b[idx+2]; 743 tp[3] = b[idx+3]; 744 tp[4] = b[idx+4]; 745 tp += 5; 746 } 747 748 for (k=0; k<mbs; k++){ 749 v = aa + 25*ai[k]; 750 vj = aj + ai[k]; 751 tp = t + k*5; 752 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 753 nz = ai[k+1] - ai[k]; 754 755 tp = t + (*vj)*5; 756 while (nz--) { 757 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 758 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 759 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 760 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 761 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 762 vj++; tp = t + (*vj)*5; 763 v += 25; 764 } 765 766 /* xk = inv(Dk)*(Dk*xk) */ 767 diag = aa+k*25; /* ptr to inv(Dk) */ 768 tp = t + k*5; 769 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 770 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 771 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 772 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 773 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 774 } 775 776 /* solve U*x = y by back substitution */ 777 for (k=mbs-1; k>=0; k--){ 778 v = aa + 25*ai[k]; 779 vj = aj + ai[k]; 780 tp = t + k*5; 781 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 782 nz = ai[k+1] - ai[k]; 783 784 tp = t + (*vj)*5; 785 while (nz--) { 786 /* xk += U(k,:)*x(:) */ 787 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 788 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 789 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 790 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 791 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 792 vj++; tp = t + (*vj)*5; 793 v += 25; 794 } 795 tp = t + k*5; 796 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 797 idx = 5*r[k]; 798 x[idx] = x0; 799 x[idx+1] = x1; 800 x[idx+2] = x2; 801 x[idx+3] = x3; 802 x[idx+4] = x4; 803 } 804 805 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 806 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 807 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 808 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 814 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 815 { 816 MatScalar *v,*diag; 817 PetscScalar *xp,x0,x1,x2,x3,x4; 818 PetscInt nz,*vj,k; 819 820 PetscFunctionBegin; 821 for (k=0; k<mbs; k++){ 822 v = aa + 25*ai[k]; 823 xp = x + k*5; 824 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 825 nz = ai[k+1] - ai[k]; 826 vj = aj + ai[k]; 827 xp = x + (*vj)*5; 828 while (nz--) { 829 /* x(:) += U(k,:)^T*(Dk*xk) */ 830 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 831 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 832 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 833 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 834 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 835 vj++; xp = x + (*vj)*5; 836 v += 25; 837 } 838 /* xk = inv(Dk)*(Dk*xk) */ 839 diag = aa+k*25; /* ptr to inv(Dk) */ 840 xp = x + k*5; 841 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 842 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 843 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 844 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 845 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 846 } 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 852 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 853 { 854 MatScalar *v; 855 PetscScalar *xp,x0,x1,x2,x3,x4; 856 PetscInt nz,*vj,k; 857 858 PetscFunctionBegin; 859 for (k=mbs-1; k>=0; k--){ 860 v = aa + 25*ai[k]; 861 xp = x + k*5; 862 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 863 nz = ai[k+1] - ai[k]; 864 vj = aj + ai[k]; 865 xp = x + (*vj)*5; 866 while (nz--) { 867 /* xk += U(k,:)*x(:) */ 868 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 869 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 870 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 871 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 872 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 873 vj++; 874 v += 25; xp = x + (*vj)*5; 875 } 876 xp = x + k*5; 877 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 884 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 885 { 886 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 887 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 888 MatScalar *aa=a->a; 889 PetscScalar *x,*b; 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 894 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 895 896 /* solve U^T * D * y = b by forward substitution */ 897 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 898 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 899 900 /* solve U*x = y by back substitution */ 901 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 902 903 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 904 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 905 ierr = PetscLogFlops(25*(2*a->nz + mbs));CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 911 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 912 { 913 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 914 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 915 MatScalar *aa=a->a; 916 PetscScalar *x,*b; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 923 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 924 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = PetscLogFlops(25*(a->nz + mbs));CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 932 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 933 { 934 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 935 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 936 MatScalar *aa=a->a; 937 PetscScalar *x,*b; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 942 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 943 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 944 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 945 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 946 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 947 ierr = PetscLogFlops(25*a->nz);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 953 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 954 { 955 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 956 IS isrow=a->row; 957 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 958 PetscErrorCode ierr; 959 PetscInt nz,*vj,k,*r,idx; 960 MatScalar *aa=a->a,*v,*diag; 961 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 962 963 PetscFunctionBegin; 964 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 966 t = a->solve_work; 967 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 968 969 /* solve U^T * D * y = b by forward substitution */ 970 tp = t; 971 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 972 idx = 4*r[k]; 973 tp[0] = b[idx]; 974 tp[1] = b[idx+1]; 975 tp[2] = b[idx+2]; 976 tp[3] = b[idx+3]; 977 tp += 4; 978 } 979 980 for (k=0; k<mbs; k++){ 981 v = aa + 16*ai[k]; 982 vj = aj + ai[k]; 983 tp = t + k*4; 984 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 985 nz = ai[k+1] - ai[k]; 986 987 tp = t + (*vj)*4; 988 while (nz--) { 989 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 990 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 991 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 992 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 993 vj++; tp = t + (*vj)*4; 994 v += 16; 995 } 996 997 /* xk = inv(Dk)*(Dk*xk) */ 998 diag = aa+k*16; /* ptr to inv(Dk) */ 999 tp = t + k*4; 1000 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1001 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1002 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1003 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1004 } 1005 1006 /* solve U*x = y by back substitution */ 1007 for (k=mbs-1; k>=0; k--){ 1008 v = aa + 16*ai[k]; 1009 vj = aj + ai[k]; 1010 tp = t + k*4; 1011 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1012 nz = ai[k+1] - ai[k]; 1013 1014 tp = t + (*vj)*4; 1015 while (nz--) { 1016 /* xk += U(k,:)*x(:) */ 1017 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1018 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1019 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1020 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1021 vj++; tp = t + (*vj)*4; 1022 v += 16; 1023 } 1024 tp = t + k*4; 1025 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1026 idx = 4*r[k]; 1027 x[idx] = x0; 1028 x[idx+1] = x1; 1029 x[idx+2] = x2; 1030 x[idx+3] = x3; 1031 } 1032 1033 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1036 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1042 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1043 { 1044 MatScalar *v,*diag; 1045 PetscScalar *xp,x0,x1,x2,x3; 1046 PetscInt nz,*vj,k; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<mbs; k++){ 1050 v = aa + 16*ai[k]; 1051 xp = x + k*4; 1052 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1053 nz = ai[k+1] - ai[k]; 1054 vj = aj + ai[k]; 1055 xp = x + (*vj)*4; 1056 while (nz--) { 1057 /* x(:) += U(k,:)^T*(Dk*xk) */ 1058 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1059 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1060 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1061 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1062 vj++; xp = x + (*vj)*4; 1063 v += 16; 1064 } 1065 /* xk = inv(Dk)*(Dk*xk) */ 1066 diag = aa+k*16; /* ptr to inv(Dk) */ 1067 xp = x + k*4; 1068 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1069 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1070 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1071 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1078 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1079 { 1080 MatScalar *v; 1081 PetscScalar *xp,x0,x1,x2,x3; 1082 PetscInt nz,*vj,k; 1083 1084 PetscFunctionBegin; 1085 for (k=mbs-1; k>=0; k--){ 1086 v = aa + 16*ai[k]; 1087 xp = x + k*4; 1088 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1089 nz = ai[k+1] - ai[k]; 1090 vj = aj + ai[k]; 1091 xp = x + (*vj)*4; 1092 while (nz--) { 1093 /* xk += U(k,:)*x(:) */ 1094 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1095 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1096 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1097 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1098 vj++; 1099 v += 16; xp = x + (*vj)*4; 1100 } 1101 xp = x + k*4; 1102 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1109 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1110 { 1111 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1112 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1113 MatScalar *aa=a->a; 1114 PetscScalar *x,*b; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1119 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1120 1121 /* solve U^T * D * y = b by forward substitution */ 1122 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1123 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1124 1125 /* solve U*x = y by back substitution */ 1126 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1127 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1128 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1129 ierr = PetscLogFlops(16*(2*a->nz + mbs));CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1135 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1136 { 1137 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1138 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1139 MatScalar *aa=a->a; 1140 PetscScalar *x,*b; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1146 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1147 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1150 ierr = PetscLogFlops(16*a->nz + mbs);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1156 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1157 { 1158 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1159 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1160 MatScalar *aa=a->a; 1161 PetscScalar *x,*b; 1162 PetscErrorCode ierr; 1163 1164 PetscFunctionBegin; 1165 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1166 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1167 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1168 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1169 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1170 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1171 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1177 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1178 { 1179 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1180 IS isrow=a->row; 1181 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1182 PetscErrorCode ierr; 1183 PetscInt nz,*vj,k,*r,idx; 1184 MatScalar *aa=a->a,*v,*diag; 1185 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1186 1187 PetscFunctionBegin; 1188 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1190 t = a->solve_work; 1191 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1192 1193 /* solve U^T * D * y = b by forward substitution */ 1194 tp = t; 1195 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1196 idx = 3*r[k]; 1197 tp[0] = b[idx]; 1198 tp[1] = b[idx+1]; 1199 tp[2] = b[idx+2]; 1200 tp += 3; 1201 } 1202 1203 for (k=0; k<mbs; k++){ 1204 v = aa + 9*ai[k]; 1205 vj = aj + ai[k]; 1206 tp = t + k*3; 1207 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1208 nz = ai[k+1] - ai[k]; 1209 1210 tp = t + (*vj)*3; 1211 while (nz--) { 1212 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1213 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1214 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1215 vj++; tp = t + (*vj)*3; 1216 v += 9; 1217 } 1218 1219 /* xk = inv(Dk)*(Dk*xk) */ 1220 diag = aa+k*9; /* ptr to inv(Dk) */ 1221 tp = t + k*3; 1222 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1223 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1224 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1225 } 1226 1227 /* solve U*x = y by back substitution */ 1228 for (k=mbs-1; k>=0; k--){ 1229 v = aa + 9*ai[k]; 1230 vj = aj + ai[k]; 1231 tp = t + k*3; 1232 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1233 nz = ai[k+1] - ai[k]; 1234 1235 tp = t + (*vj)*3; 1236 while (nz--) { 1237 /* xk += U(k,:)*x(:) */ 1238 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1239 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1240 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1241 vj++; tp = t + (*vj)*3; 1242 v += 9; 1243 } 1244 tp = t + k*3; 1245 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1246 idx = 3*r[k]; 1247 x[idx] = x0; 1248 x[idx+1] = x1; 1249 x[idx+2] = x2; 1250 } 1251 1252 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1254 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1255 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1261 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1262 { 1263 MatScalar *v,*diag; 1264 PetscScalar *xp,x0,x1,x2; 1265 PetscInt nz,*vj,k; 1266 1267 PetscFunctionBegin; 1268 for (k=0; k<mbs; k++){ 1269 v = aa + 9*ai[k]; 1270 xp = x + k*3; 1271 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1272 nz = ai[k+1] - ai[k]; 1273 vj = aj + ai[k]; 1274 xp = x + (*vj)*3; 1275 while (nz--) { 1276 /* x(:) += U(k,:)^T*(Dk*xk) */ 1277 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1278 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1279 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1280 vj++; xp = x + (*vj)*3; 1281 v += 9; 1282 } 1283 /* xk = inv(Dk)*(Dk*xk) */ 1284 diag = aa+k*9; /* ptr to inv(Dk) */ 1285 xp = x + k*3; 1286 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1287 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1288 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1289 } 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1295 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1296 { 1297 MatScalar *v; 1298 PetscScalar *xp,x0,x1,x2; 1299 PetscInt nz,*vj,k; 1300 1301 PetscFunctionBegin; 1302 for (k=mbs-1; k>=0; k--){ 1303 v = aa + 9*ai[k]; 1304 xp = x + k*3; 1305 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1306 nz = ai[k+1] - ai[k]; 1307 vj = aj + ai[k]; 1308 xp = x + (*vj)*3; 1309 while (nz--) { 1310 /* xk += U(k,:)*x(:) */ 1311 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1312 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1313 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1314 vj++; 1315 v += 9; xp = x + (*vj)*3; 1316 } 1317 xp = x + k*3; 1318 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1325 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1326 { 1327 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1328 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1329 MatScalar *aa=a->a; 1330 PetscScalar *x,*b; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1336 1337 /* solve U^T * D * y = b by forward substitution */ 1338 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1339 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1340 1341 /* solve U*x = y by back substitution */ 1342 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1343 1344 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1345 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1346 ierr = PetscLogFlops(9*(2*a->nz + mbs));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1352 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1353 { 1354 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1355 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1356 MatScalar *aa=a->a; 1357 PetscScalar *x,*b; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1363 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1364 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1365 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1366 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1367 ierr = PetscLogFlops(9*(a->nz + mbs));CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1373 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1374 { 1375 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1376 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1377 MatScalar *aa=a->a; 1378 PetscScalar *x,*b; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1383 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1384 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1385 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1386 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1388 ierr = PetscLogFlops(9*a->nz);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1394 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1395 { 1396 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1397 IS isrow=a->row; 1398 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1399 PetscErrorCode ierr; 1400 PetscInt nz,*vj,k,k2,*r,idx; 1401 MatScalar *aa=a->a,*v,*diag; 1402 PetscScalar *x,*b,x0,x1,*t; 1403 1404 PetscFunctionBegin; 1405 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1406 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1407 t = a->solve_work; 1408 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1409 1410 /* solve U^T * D * y = perm(b) by forward substitution */ 1411 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1412 idx = 2*r[k]; 1413 t[k*2] = b[idx]; 1414 t[k*2+1] = b[idx+1]; 1415 } 1416 for (k=0; k<mbs; k++){ 1417 v = aa + 4*ai[k]; 1418 vj = aj + ai[k]; 1419 k2 = k*2; 1420 x0 = t[k2]; x1 = t[k2+1]; 1421 nz = ai[k+1] - ai[k]; 1422 while (nz--) { 1423 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1424 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1425 vj++; v += 4; 1426 } 1427 diag = aa+k*4; /* ptr to inv(Dk) */ 1428 t[k2] = diag[0]*x0 + diag[2]*x1; 1429 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1430 } 1431 1432 /* solve U*x = y by back substitution */ 1433 for (k=mbs-1; k>=0; k--){ 1434 v = aa + 4*ai[k]; 1435 vj = aj + ai[k]; 1436 k2 = k*2; 1437 x0 = t[k2]; x1 = t[k2+1]; 1438 nz = ai[k+1] - ai[k]; 1439 while (nz--) { 1440 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1441 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1442 vj++; v += 4; 1443 } 1444 t[k2] = x0; 1445 t[k2+1] = x1; 1446 idx = 2*r[k]; 1447 x[idx] = x0; 1448 x[idx+1] = x1; 1449 } 1450 1451 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1452 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1453 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1454 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1460 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1461 { 1462 MatScalar *v,*diag; 1463 PetscScalar x0,x1; 1464 PetscInt nz,*vj,k,k2; 1465 1466 PetscFunctionBegin; 1467 for (k=0; k<mbs; k++){ 1468 v = aa + 4*ai[k]; 1469 vj = aj + ai[k]; 1470 k2 = k*2; 1471 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1472 nz = ai[k+1] - ai[k]; 1473 1474 while (nz--) { 1475 /* x(:) += U(k,:)^T*(Dk*xk) */ 1476 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1477 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1478 vj++; v += 4; 1479 } 1480 /* xk = inv(Dk)*(Dk*xk) */ 1481 diag = aa+k*4; /* ptr to inv(Dk) */ 1482 x[k2] = diag[0]*x0 + diag[2]*x1; 1483 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1490 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1491 { 1492 MatScalar *v; 1493 PetscScalar x0,x1; 1494 PetscInt nz,*vj,k,k2; 1495 1496 PetscFunctionBegin; 1497 for (k=mbs-1; k>=0; k--){ 1498 v = aa + 4*ai[k]; 1499 vj = aj + ai[k]; 1500 k2 = k*2; 1501 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1502 nz = ai[k+1] - ai[k]; 1503 while (nz--) { 1504 /* xk += U(k,:)*x(:) */ 1505 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1506 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1507 vj++; v += 4; 1508 } 1509 x[k2] = x0; 1510 x[k2+1] = x1; 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1517 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1518 { 1519 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1520 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1521 MatScalar *aa=a->a; 1522 PetscScalar *x,*b; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1527 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1528 1529 /* solve U^T * D * y = b by forward substitution */ 1530 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1531 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1532 1533 /* solve U*x = y by back substitution */ 1534 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1535 1536 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 ierr = PetscLogFlops(4*(2*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1544 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1545 { 1546 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1547 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1548 MatScalar *aa=a->a; 1549 PetscScalar *x,*b; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1554 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1555 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1556 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1557 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1558 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1559 ierr = PetscLogFlops(4*(a->nz + mbs));CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1565 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1566 { 1567 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1568 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1569 MatScalar *aa=a->a; 1570 PetscScalar *x,*b; 1571 PetscErrorCode ierr; 1572 1573 PetscFunctionBegin; 1574 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1575 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1576 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1577 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1578 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1579 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1580 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1586 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1587 { 1588 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1589 IS isrow=a->row; 1590 PetscErrorCode ierr; 1591 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1592 MatScalar *aa=a->a,*v; 1593 PetscScalar *x,*b,xk,*t; 1594 PetscInt nz,*vj,k; 1595 1596 PetscFunctionBegin; 1597 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1599 t = a->solve_work; 1600 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1601 1602 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1603 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1604 for (k=0; k<mbs; k++){ 1605 v = aa + ai[k] + 1; 1606 vj = aj + ai[k] + 1; 1607 xk = t[k]; 1608 nz = ai[k+1] - ai[k] - 1; 1609 while (nz--) t[*vj++] += (*v++) * xk; 1610 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1611 } 1612 1613 /* solve U*perm(x) = y by back substitution */ 1614 for (k=mbs-1; k>=0; k--){ 1615 v = aa + ai[k] + 1; 1616 vj = aj + ai[k] + 1; 1617 xk = t[k]; 1618 nz = ai[k+1] - ai[k] - 1; 1619 while (nz--) xk += (*v++) * t[*vj++]; 1620 t[k] = xk; 1621 x[rp[k]] = xk; 1622 } 1623 1624 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1625 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1627 ierr = PetscLogFlops(4*a->nz + 3*A->rmap.N);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1633 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1634 { 1635 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1636 IS isrow=a->row; 1637 PetscErrorCode ierr; 1638 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1639 MatScalar *aa=a->a,*v,diagk; 1640 PetscScalar *x,*b,xk; 1641 PetscInt nz,*vj,k; 1642 1643 PetscFunctionBegin; 1644 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1645 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1646 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1647 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1648 1649 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1650 for (k=0; k<mbs; k++){ 1651 v = aa + ai[k] + 1; 1652 vj = aj + ai[k] + 1; 1653 xk = x[k]; 1654 nz = ai[k+1] - ai[k] - 1; 1655 while (nz--) x[*vj++] += (*v++) * xk; 1656 1657 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1658 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1659 x[k] = xk*sqrt(diagk); 1660 } 1661 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1662 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1664 ierr = PetscLogFlops(2*a->nz + 2*A->rmap.N);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1670 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1671 { 1672 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1673 IS isrow=a->row; 1674 PetscErrorCode ierr; 1675 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1676 MatScalar *aa=a->a,*v,diagk; 1677 PetscScalar *x,*b,xk,*t; 1678 PetscInt nz,*vj,k; 1679 1680 PetscFunctionBegin; 1681 /* solve D^(1/2)*U*perm(x) = y by back substitution */ 1682 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1683 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1684 t = a->solve_work; 1685 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1686 1687 for (k=0; k<mbs; k++){ 1688 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1689 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1690 t[k] = b[k] * sqrt(diagk); 1691 } 1692 1693 for (k=mbs-1; k>=0; k--){ 1694 v = aa + ai[k] + 1; 1695 vj = aj + ai[k] + 1; 1696 xk = t[k]; 1697 nz = ai[k+1] - ai[k] - 1; 1698 while (nz--) xk += (*v++) * t[*vj++]; 1699 t[k] = xk; 1700 } 1701 for (k=0; k<mbs; k++) x[rp[k]] = t[k]; 1702 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1703 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1705 ierr = PetscLogFlops(2*a->nz + 2*A->rmap.N);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1711 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1712 { 1713 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 if (A->rmap.bs == 1) { 1718 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1719 } else { 1720 IS isrow=a->row; 1721 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i; 1722 MatScalar *aa=a->a,*v; 1723 PetscScalar *x,*b,*t; 1724 PetscInt nz,*vj,k,n; 1725 if (bb->n > a->solves_work_n) { 1726 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1727 ierr = PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1728 a->solves_work_n = bb->n; 1729 } 1730 n = bb->n; 1731 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1732 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1733 t = a->solves_work; 1734 1735 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1736 1737 /* solve U^T*D*y = perm(b) by forward substitution */ 1738 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1739 for (k=0; k<mbs; k++){ 1740 v = aa + ai[k]; 1741 vj = aj + ai[k]; 1742 nz = ai[k+1] - ai[k]; 1743 while (nz--) { 1744 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1745 v++;vj++; 1746 } 1747 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1748 } 1749 1750 /* solve U*perm(x) = y by back substitution */ 1751 for (k=mbs-1; k>=0; k--){ 1752 v = aa + ai[k]; 1753 vj = aj + ai[k]; 1754 nz = ai[k+1] - ai[k]; 1755 while (nz--) { 1756 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1757 v++;vj++; 1758 } 1759 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1760 } 1761 1762 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1763 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1764 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1765 ierr = PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));CHKERRQ(ierr); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 /* 1771 Special case where the matrix was ILU(0) factored in the natural 1772 ordering. This eliminates the need for the column and row permutation. 1773 */ 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1776 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1777 { 1778 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1779 PetscErrorCode ierr; 1780 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1781 MatScalar *aa=a->a,*v; 1782 PetscScalar *x,*b,xk; 1783 PetscInt nz,*vj,k; 1784 1785 PetscFunctionBegin; 1786 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1787 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1788 1789 /* solve U^T*D*y = b by forward substitution */ 1790 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1791 for (k=0; k<mbs; k++){ 1792 v = aa + ai[k] + 1; 1793 vj = aj + ai[k] + 1; 1794 xk = x[k]; 1795 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1796 while (nz--) x[*vj++] += (*v++) * xk; 1797 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1798 } 1799 1800 /* solve U*x = y by back substitution */ 1801 for (k=mbs-2; k>=0; k--){ 1802 v = aa + ai[k] + 1; 1803 vj = aj + ai[k] + 1; 1804 xk = x[k]; 1805 nz = ai[k+1] - ai[k] - 1; 1806 while (nz--) xk += (*v++) * x[*vj++]; 1807 x[k] = xk; 1808 } 1809 1810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1812 ierr = PetscLogFlops(4*a->nz + A->rmap.N);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1818 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1819 { 1820 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1821 PetscErrorCode ierr; 1822 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1823 MatScalar *aa=a->a,*v,diagk; 1824 PetscScalar *x,*b,xk; 1825 PetscInt nz,*vj,k; 1826 1827 PetscFunctionBegin; 1828 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1829 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1830 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1831 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1832 for (k=0; k<mbs; k++){ 1833 v = aa + ai[k] + 1; 1834 vj = aj + ai[k] + 1; 1835 xk = x[k]; 1836 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1837 while (nz--) x[*vj++] += (*v++) * xk; 1838 if (PetscImaginaryPart(aa[ai[k]])) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real"); 1839 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1840 x[k] = xk*sqrt(diagk); 1841 } 1842 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1843 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1844 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1850 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1851 { 1852 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1853 PetscErrorCode ierr; 1854 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1855 MatScalar *aa=a->a,*v,diagk; 1856 PetscScalar *x,*b,xk; 1857 PetscInt nz,*vj,k; 1858 1859 PetscFunctionBegin; 1860 /* solve D^(1/2)*U*x = b by back substitution */ 1861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1863 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1864 1865 for (k=0; k<mbs; k++){ 1866 if (PetscImaginaryPart(aa[ai[k]])) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real"); 1867 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1868 x[k] *= sqrt(diagk); 1869 } 1870 1871 for (k=mbs-2; k>=0; k--){ 1872 v = aa + ai[k] + 1; 1873 vj = aj + ai[k] + 1; 1874 xk = x[k]; 1875 nz = ai[k+1] - ai[k] - 1; 1876 while (nz--) xk += (*v++) * x[*vj++]; 1877 x[k] = xk; 1878 } 1879 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1880 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1881 ierr = PetscLogFlops(2*a->nz + A->rmap.N);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1888 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 1889 { 1890 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1891 PetscErrorCode ierr; 1892 PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j; 1893 PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2; 1894 PetscInt m,reallocs = 0,*levtmp; 1895 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1896 PetscInt incrlev,*lev,shift,prow,nz; 1897 PetscReal f = info->fill,levels = info->levels; 1898 PetscTruth perm_identity; 1899 1900 PetscFunctionBegin; 1901 /* check whether perm is the identity mapping */ 1902 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1903 1904 if (perm_identity){ 1905 a->permute = PETSC_FALSE; 1906 ai = a->i; aj = a->j; 1907 } else { /* non-trivial permutation */ 1908 a->permute = PETSC_TRUE; 1909 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1910 ai = a->inew; aj = a->jnew; 1911 } 1912 1913 /* initialization */ 1914 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1915 umax = (PetscInt)(f*ai[mbs] + 1); 1916 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1917 umax += mbs + 1; 1918 shift = mbs + 1; 1919 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1920 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1921 iu[0] = mbs + 1; 1922 juidx = mbs + 1; 1923 /* prowl: linked list for pivot row */ 1924 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1925 /* q: linked list for col index */ 1926 q = prowl + mbs; 1927 levtmp = q + mbs; 1928 1929 for (i=0; i<mbs; i++){ 1930 prowl[i] = mbs; 1931 q[i] = 0; 1932 } 1933 1934 /* for each row k */ 1935 for (k=0; k<mbs; k++){ 1936 nzk = 0; 1937 q[k] = mbs; 1938 /* copy current row into linked list */ 1939 nz = ai[rip[k]+1] - ai[rip[k]]; 1940 j = ai[rip[k]]; 1941 while (nz--){ 1942 vj = rip[aj[j++]]; 1943 if (vj > k){ 1944 qm = k; 1945 do { 1946 m = qm; qm = q[m]; 1947 } while(qm < vj); 1948 if (qm == vj) { 1949 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1950 } 1951 nzk++; 1952 q[m] = vj; 1953 q[vj] = qm; 1954 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1955 } 1956 } 1957 1958 /* modify nonzero structure of k-th row by computing fill-in 1959 for each row prow to be merged in */ 1960 prow = k; 1961 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1962 1963 while (prow < k){ 1964 /* merge row prow into k-th row */ 1965 jmin = iu[prow] + 1; 1966 jmax = iu[prow+1]; 1967 qm = k; 1968 for (j=jmin; j<jmax; j++){ 1969 incrlev = lev[j-shift] + 1; 1970 if (incrlev > levels) continue; 1971 vj = ju[j]; 1972 do { 1973 m = qm; qm = q[m]; 1974 } while (qm < vj); 1975 if (qm != vj){ /* a fill */ 1976 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1977 levtmp[vj] = incrlev; 1978 } else { 1979 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1980 } 1981 } 1982 prow = prowl[prow]; /* next pivot row */ 1983 } 1984 1985 /* add k to row list for first nonzero element in k-th row */ 1986 if (nzk > 1){ 1987 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1988 prowl[k] = prowl[i]; prowl[i] = k; 1989 } 1990 iu[k+1] = iu[k] + nzk; 1991 1992 /* allocate more space to ju and lev if needed */ 1993 if (iu[k+1] > umax) { 1994 /* estimate how much additional space we will need */ 1995 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1996 /* just double the memory each time */ 1997 maxadd = umax; 1998 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 1999 umax += maxadd; 2000 2001 /* allocate a longer ju */ 2002 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2003 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2004 ierr = PetscFree(ju);CHKERRQ(ierr); 2005 ju = jutmp; 2006 2007 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2008 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2009 ierr = PetscFree(lev);CHKERRQ(ierr); 2010 lev = jutmp; 2011 reallocs += 2; /* count how many times we realloc */ 2012 } 2013 2014 /* save nonzero structure of k-th row in ju */ 2015 i=k; 2016 while (nzk --) { 2017 i = q[i]; 2018 ju[juidx] = i; 2019 lev[juidx-shift] = levtmp[i]; 2020 juidx++; 2021 } 2022 } 2023 2024 #if defined(PETSC_USE_INFO) 2025 if (ai[mbs] != 0) { 2026 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2027 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2028 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2029 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2030 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2031 } else { 2032 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2033 } 2034 #endif 2035 2036 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2037 ierr = PetscFree(prowl);CHKERRQ(ierr); 2038 ierr = PetscFree(lev);CHKERRQ(ierr); 2039 2040 /* put together the new matrix */ 2041 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 2042 ierr = MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);CHKERRQ(ierr); 2043 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 2044 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2045 2046 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 2047 b = (Mat_SeqSBAIJ*)(*B)->data; 2048 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2049 b->singlemalloc = PETSC_FALSE; 2050 b->free_a = PETSC_TRUE; 2051 b->free_ij = PETSC_TRUE; 2052 /* the next line frees the default space generated by the Create() */ 2053 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2054 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2055 b->j = ju; 2056 b->i = iu; 2057 b->diag = 0; 2058 b->ilen = 0; 2059 b->imax = 0; 2060 2061 if (b->row) { 2062 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2063 } 2064 if (b->icol) { 2065 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2066 } 2067 b->row = perm; 2068 b->icol = perm; 2069 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2070 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2071 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2072 /* In b structure: Free imax, ilen, old a, old j. 2073 Allocate idnew, solve_work, new a, new j */ 2074 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2075 b->maxnz = b->nz = iu[mbs]; 2076 2077 (*B)->factor = FACTOR_CHOLESKY; 2078 (*B)->info.factor_mallocs = reallocs; 2079 (*B)->info.fill_ratio_given = f; 2080 if (ai[mbs] != 0) { 2081 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2082 } else { 2083 (*B)->info.fill_ratio_needed = 0.0; 2084 } 2085 2086 if (perm_identity){ 2087 switch (bs) { 2088 case 1: 2089 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2090 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2091 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2092 (*B)->ops->solves = MatSolves_SeqSBAIJ_1; 2093 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2094 break; 2095 case 2: 2096 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2097 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2098 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2099 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2100 break; 2101 case 3: 2102 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2103 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2104 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2105 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2106 break; 2107 case 4: 2108 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2109 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2110 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2111 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2112 break; 2113 case 5: 2114 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2115 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2116 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2117 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2118 break; 2119 case 6: 2120 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2121 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2122 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2123 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2124 break; 2125 case 7: 2126 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2127 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2128 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2129 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2130 break; 2131 default: 2132 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2133 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2134 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2135 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2136 break; 2137 } 2138 } 2139 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #include "petscbt.h" 2144 #include "src/mat/utils/freespace.h" 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2147 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 2148 { 2149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2150 Mat_SeqSBAIJ *b; 2151 Mat B; 2152 PetscErrorCode ierr; 2153 PetscTruth perm_identity,free_ij = PETSC_TRUE; 2154 PetscInt bs=A->rmap.bs,am=a->mbs; 2155 PetscInt reallocs=0,*rip,i,*ai,*aj,*ui; 2156 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2157 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2158 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2159 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2160 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2161 PetscBT lnkbt; 2162 PetscScalar *ua; 2163 2164 PetscFunctionBegin; 2165 /* 2166 This code originally uses Modified Sparse Row (MSR) storage 2167 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2168 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2169 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2170 thus the original code in MSR format is still used for these cases. 2171 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2172 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2173 */ 2174 if (bs > 1){ 2175 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* check whether perm is the identity mapping */ 2180 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2181 2182 /* special case that simply copies fill pattern */ 2183 if (!levels && perm_identity) { 2184 a->permute = PETSC_FALSE; 2185 /* reuse the column pointers and row offsets for memory savings */ 2186 ui = a->i; 2187 uj = a->j; 2188 free_ij = PETSC_FALSE; 2189 ratio_needed = 1.0; 2190 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2191 if (perm_identity){ 2192 a->permute = PETSC_FALSE; 2193 ai = a->i; aj = a->j; 2194 } else { /* non-trivial permutation */ 2195 a->permute = PETSC_TRUE; 2196 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2197 ai = a->inew; aj = a->jnew; 2198 } 2199 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2200 2201 /* initialization */ 2202 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2203 ui[0] = 0; 2204 2205 /* jl: linked list for storing indices of the pivot rows 2206 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2207 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2208 il = jl + am; 2209 uj_ptr = (PetscInt**)(il + am); 2210 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2211 for (i=0; i<am; i++){ 2212 jl[i] = am; il[i] = 0; 2213 } 2214 2215 /* create and initialize a linked list for storing column indices of the active row k */ 2216 nlnk = am + 1; 2217 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2218 2219 /* initial FreeSpace size is fill*(ai[am]+1) */ 2220 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2221 current_space = free_space; 2222 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2223 current_space_lvl = free_space_lvl; 2224 2225 for (k=0; k<am; k++){ /* for each active row k */ 2226 /* initialize lnk by the column indices of row rip[k] */ 2227 nzk = 0; 2228 ncols = ai[rip[k]+1] - ai[rip[k]]; 2229 cols = aj+ai[rip[k]]; 2230 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2231 nzk += nlnk; 2232 2233 /* update lnk by computing fill-in for each pivot row to be merged in */ 2234 prow = jl[k]; /* 1st pivot row */ 2235 2236 while (prow < k){ 2237 nextprow = jl[prow]; 2238 2239 /* merge prow into k-th row */ 2240 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2241 jmax = ui[prow+1]; 2242 ncols = jmax-jmin; 2243 i = jmin - ui[prow]; 2244 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2245 j = *(uj_lvl_ptr[prow] + i - 1); 2246 cols_lvl = uj_lvl_ptr[prow]+i; 2247 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2248 nzk += nlnk; 2249 2250 /* update il and jl for prow */ 2251 if (jmin < jmax){ 2252 il[prow] = jmin; 2253 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2254 } 2255 prow = nextprow; 2256 } 2257 2258 /* if free space is not available, make more free space */ 2259 if (current_space->local_remaining<nzk) { 2260 i = am - k + 1; /* num of unfactored rows */ 2261 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2262 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2263 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2264 reallocs++; 2265 } 2266 2267 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2268 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2269 2270 /* add the k-th row into il and jl */ 2271 if (nzk-1 > 0){ 2272 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2273 jl[k] = jl[i]; jl[i] = k; 2274 il[k] = ui[k] + 1; 2275 } 2276 uj_ptr[k] = current_space->array; 2277 uj_lvl_ptr[k] = current_space_lvl->array; 2278 2279 current_space->array += nzk; 2280 current_space->local_used += nzk; 2281 current_space->local_remaining -= nzk; 2282 current_space_lvl->array += nzk; 2283 current_space_lvl->local_used += nzk; 2284 current_space_lvl->local_remaining -= nzk; 2285 2286 ui[k+1] = ui[k] + nzk; 2287 } 2288 2289 #if defined(PETSC_USE_INFO) 2290 if (ai[am] != 0) { 2291 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2292 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2293 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2294 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2295 } else { 2296 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2297 } 2298 #endif 2299 2300 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2301 ierr = PetscFree(jl);CHKERRQ(ierr); 2302 2303 /* destroy list of free space and other temporary array(s) */ 2304 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2305 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2306 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2307 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2308 if (ai[am] != 0) { 2309 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2310 } else { 2311 ratio_needed = 0.0; 2312 } 2313 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2314 2315 /* put together the new matrix in MATSEQSBAIJ format */ 2316 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);CHKERRQ(ierr); 2317 ierr = MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);CHKERRQ(ierr); 2318 B = *fact; 2319 b = (Mat_SeqSBAIJ*)B->data; 2320 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2321 b->singlemalloc = PETSC_FALSE; 2322 b->free_a = PETSC_TRUE; 2323 b->free_ij = free_ij; 2324 b->diag = 0; 2325 b->ilen = 0; 2326 b->imax = 0; 2327 b->row = perm; 2328 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2329 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2330 b->icol = perm; 2331 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2332 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2333 b->maxnz = b->nz = ui[am]; 2334 2335 B->factor = FACTOR_CHOLESKY; 2336 B->info.factor_mallocs = reallocs; 2337 B->info.fill_ratio_given = fill; 2338 B->info.fill_ratio_needed = ratio_needed; 2339 2340 if (perm_identity){ 2341 switch (bs) { 2342 case 1: 2343 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2344 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2345 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2346 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");CHKERRQ(ierr); 2347 break; 2348 case 2: 2349 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 2350 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2351 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 2352 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 2353 break; 2354 case 3: 2355 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 2356 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2357 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 2358 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 2359 break; 2360 case 4: 2361 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 2362 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2363 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 2364 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 2365 break; 2366 case 5: 2367 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 2368 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2369 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 2370 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 2371 break; 2372 case 6: 2373 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 2374 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2375 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 2376 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 2377 break; 2378 case 7: 2379 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 2380 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2381 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 2382 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 2383 break; 2384 default: 2385 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 2386 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2387 B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 2388 ierr = PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");CHKERRQ(ierr); 2389 break; 2390 } 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395