1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 13 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 14 { 15 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 16 IS isrow=a->row; 17 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 18 PetscErrorCode ierr; 19 const PetscInt *r; 20 PetscInt nz,*vj,k,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 #if defined(PETSC_USE_LOG) 169 PetscInt bs2 = a->bs2; 170 #endif 171 172 PetscFunctionBegin; 173 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 176 /* solve U^T * D * y = b by forward substitution */ 177 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 178 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 /* solve U*x = y by back substitution */ 181 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 182 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 PetscErrorCode ierr; 195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 196 PetscInt bs=A->rmap->bs; 197 MatScalar *aa=a->a; 198 PetscScalar *x,*b; 199 #if defined(PETSC_USE_LOG) 200 PetscInt bs2 = a->bs2; 201 #endif 202 203 PetscFunctionBegin; 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 207 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 217 { 218 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 221 PetscInt bs=A->rmap->bs; 222 MatScalar *aa=a->a; 223 PetscScalar *x,*b; 224 #if defined(PETSC_USE_LOG) 225 PetscInt bs2 = a->bs2; 226 #endif 227 228 PetscFunctionBegin; 229 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 232 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 241 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 242 { 243 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 244 IS isrow=a->row; 245 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 246 PetscErrorCode ierr; 247 const PetscInt *r; 248 PetscInt nz,*vj,k,idx; 249 MatScalar *aa=a->a,*v,*d; 250 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 251 252 PetscFunctionBegin; 253 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 t = a->solve_work; 256 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 257 258 /* solve U^T * D * y = b by forward substitution */ 259 tp = t; 260 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 261 idx = 7*r[k]; 262 tp[0] = b[idx]; 263 tp[1] = b[idx+1]; 264 tp[2] = b[idx+2]; 265 tp[3] = b[idx+3]; 266 tp[4] = b[idx+4]; 267 tp[5] = b[idx+5]; 268 tp[6] = b[idx+6]; 269 tp += 7; 270 } 271 272 for (k=0; k<mbs; k++){ 273 v = aa + 49*ai[k]; 274 vj = aj + ai[k]; 275 tp = t + k*7; 276 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 277 nz = ai[k+1] - ai[k]; 278 tp = t + (*vj)*7; 279 while (nz--) { 280 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 281 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 282 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 283 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 284 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 285 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 286 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 287 vj++; tp = t + (*vj)*7; 288 v += 49; 289 } 290 291 /* xk = inv(Dk)*(Dk*xk) */ 292 d = aa+k*49; /* ptr to inv(Dk) */ 293 tp = t + k*7; 294 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 295 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 296 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 297 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 298 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 299 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 300 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 301 } 302 303 /* solve U*x = y by back substitution */ 304 for (k=mbs-1; k>=0; k--){ 305 v = aa + 49*ai[k]; 306 vj = aj + ai[k]; 307 tp = t + k*7; 308 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 309 nz = ai[k+1] - ai[k]; 310 311 tp = t + (*vj)*7; 312 while (nz--) { 313 /* xk += U(k,:)*x(:) */ 314 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 315 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 316 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 317 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 318 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 319 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 320 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 321 vj++; tp = t + (*vj)*7; 322 v += 49; 323 } 324 tp = t + k*7; 325 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 326 idx = 7*r[k]; 327 x[idx] = x0; 328 x[idx+1] = x1; 329 x[idx+2] = x2; 330 x[idx+3] = x3; 331 x[idx+4] = x4; 332 x[idx+5] = x5; 333 x[idx+6] = x6; 334 } 335 336 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 337 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = PetscLogFlops(49.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 345 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 346 { 347 MatScalar *v,*d; 348 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 349 PetscInt nz,*vj,k; 350 351 PetscFunctionBegin; 352 for (k=0; k<mbs; k++){ 353 v = aa + 49*ai[k]; 354 xp = x + k*7; 355 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 356 nz = ai[k+1] - ai[k]; 357 vj = aj + ai[k]; 358 xp = x + (*vj)*7; 359 while (nz--) { 360 /* x(:) += U(k,:)^T*(Dk*xk) */ 361 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 362 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 363 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 364 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 365 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 366 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 367 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 368 vj++; xp = x + (*vj)*7; 369 v += 49; 370 } 371 /* xk = inv(Dk)*(Dk*xk) */ 372 d = aa+k*49; /* ptr to inv(Dk) */ 373 xp = x + k*7; 374 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 375 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 376 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 377 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 378 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 379 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 380 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 381 } 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 387 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 388 { 389 MatScalar *v; 390 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 391 PetscInt nz,*vj,k; 392 393 PetscFunctionBegin; 394 for (k=mbs-1; k>=0; k--){ 395 v = aa + 49*ai[k]; 396 xp = x + k*7; 397 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 398 nz = ai[k+1] - ai[k]; 399 vj = aj + ai[k]; 400 xp = x + (*vj)*7; 401 while (nz--) { 402 /* xk += U(k,:)*x(:) */ 403 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 404 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 405 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 406 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 407 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 408 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 409 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 410 vj++; 411 v += 49; xp = x + (*vj)*7; 412 } 413 xp = x + k*7; 414 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 415 } 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 421 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscErrorCode ierr; 425 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 426 MatScalar *aa=a->a; 427 PetscScalar *x,*b; 428 429 PetscFunctionBegin; 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 433 /* solve U^T * D * y = b by forward substitution */ 434 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 435 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 436 437 /* solve U*x = y by back substitution */ 438 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 439 440 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = PetscLogFlops(49.0*(2*a->nz + mbs));CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 448 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 449 { 450 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 451 PetscErrorCode ierr; 452 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 453 MatScalar *aa=a->a; 454 PetscScalar *x,*b; 455 456 PetscFunctionBegin; 457 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 460 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 461 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 463 ierr = PetscLogFlops(49.0*a->nz + mbs);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 469 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 470 { 471 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 472 PetscErrorCode ierr; 473 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 474 MatScalar *aa=a->a; 475 PetscScalar *x,*b; 476 477 PetscFunctionBegin; 478 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 481 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 482 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 484 ierr = PetscLogFlops(49.0*a->nz);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 490 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 491 { 492 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 493 IS isrow=a->row; 494 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 495 PetscErrorCode ierr; 496 const PetscInt *r; 497 PetscInt nz,*vj,k,idx; 498 MatScalar *aa=a->a,*v,*d; 499 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 500 501 PetscFunctionBegin; 502 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 503 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 504 t = a->solve_work; 505 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 506 507 /* solve U^T * D * y = b by forward substitution */ 508 tp = t; 509 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 510 idx = 6*r[k]; 511 tp[0] = b[idx]; 512 tp[1] = b[idx+1]; 513 tp[2] = b[idx+2]; 514 tp[3] = b[idx+3]; 515 tp[4] = b[idx+4]; 516 tp[5] = b[idx+5]; 517 tp += 6; 518 } 519 520 for (k=0; k<mbs; k++){ 521 v = aa + 36*ai[k]; 522 vj = aj + ai[k]; 523 tp = t + k*6; 524 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 525 nz = ai[k+1] - ai[k]; 526 tp = t + (*vj)*6; 527 while (nz--) { 528 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 529 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 530 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 531 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 532 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 533 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 534 vj++; tp = t + (*vj)*6; 535 v += 36; 536 } 537 538 /* xk = inv(Dk)*(Dk*xk) */ 539 d = aa+k*36; /* ptr to inv(Dk) */ 540 tp = t + k*6; 541 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 542 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 543 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 544 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 545 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 546 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 547 } 548 549 /* solve U*x = y by back substitution */ 550 for (k=mbs-1; k>=0; k--){ 551 v = aa + 36*ai[k]; 552 vj = aj + ai[k]; 553 tp = t + k*6; 554 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 555 nz = ai[k+1] - ai[k]; 556 557 tp = t + (*vj)*6; 558 while (nz--) { 559 /* xk += U(k,:)*x(:) */ 560 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 561 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 562 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 563 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 564 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 565 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 566 vj++; tp = t + (*vj)*6; 567 v += 36; 568 } 569 tp = t + k*6; 570 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 571 idx = 6*r[k]; 572 x[idx] = x0; 573 x[idx+1] = x1; 574 x[idx+2] = x2; 575 x[idx+3] = x3; 576 x[idx+4] = x4; 577 x[idx+5] = x5; 578 } 579 580 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 581 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 582 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 583 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 589 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 590 { 591 MatScalar *v,*d; 592 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 593 PetscInt nz,*vj,k; 594 595 PetscFunctionBegin; 596 for (k=0; k<mbs; k++){ 597 v = aa + 36*ai[k]; 598 xp = x + k*6; 599 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 600 nz = ai[k+1] - ai[k]; 601 vj = aj + ai[k]; 602 xp = x + (*vj)*6; 603 while (nz--) { 604 /* x(:) += U(k,:)^T*(Dk*xk) */ 605 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 606 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 607 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 608 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 609 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 610 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 611 vj++; xp = x + (*vj)*6; 612 v += 36; 613 } 614 /* xk = inv(Dk)*(Dk*xk) */ 615 d = aa+k*36; /* ptr to inv(Dk) */ 616 xp = x + k*6; 617 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 618 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 619 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 620 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 621 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 622 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 623 } 624 PetscFunctionReturn(0); 625 } 626 #undef __FUNCT__ 627 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 628 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 629 { 630 MatScalar *v; 631 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 632 PetscInt nz,*vj,k; 633 634 PetscFunctionBegin; 635 for (k=mbs-1; k>=0; k--){ 636 v = aa + 36*ai[k]; 637 xp = x + k*6; 638 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 639 nz = ai[k+1] - ai[k]; 640 vj = aj + ai[k]; 641 xp = x + (*vj)*6; 642 while (nz--) { 643 /* xk += U(k,:)*x(:) */ 644 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 645 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 646 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 647 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 648 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 649 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 650 vj++; 651 v += 36; xp = x + (*vj)*6; 652 } 653 xp = x + k*6; 654 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 655 } 656 PetscFunctionReturn(0); 657 } 658 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 662 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 663 { 664 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 665 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 666 MatScalar *aa=a->a; 667 PetscScalar *x,*b; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 672 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 673 674 /* solve U^T * D * y = b by forward substitution */ 675 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 676 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 677 678 /* solve U*x = y by back substitution */ 679 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 680 681 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 683 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 689 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 690 { 691 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 692 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 693 MatScalar *aa=a->a; 694 PetscScalar *x,*b; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 700 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 701 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 ierr = PetscLogFlops(36.0*a->nz + mbs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 710 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 711 { 712 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 713 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 714 MatScalar *aa=a->a; 715 PetscScalar *x,*b; 716 PetscErrorCode ierr; 717 718 PetscFunctionBegin; 719 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 722 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 731 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 732 { 733 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 734 IS isrow=a->row; 735 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 736 PetscErrorCode ierr; 737 const PetscInt *r; 738 PetscInt nz,*vj,k,idx; 739 MatScalar *aa=a->a,*v,*diag; 740 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 745 t = a->solve_work; 746 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 747 748 /* solve U^T * D * y = b by forward substitution */ 749 tp = t; 750 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 751 idx = 5*r[k]; 752 tp[0] = b[idx]; 753 tp[1] = b[idx+1]; 754 tp[2] = b[idx+2]; 755 tp[3] = b[idx+3]; 756 tp[4] = b[idx+4]; 757 tp += 5; 758 } 759 760 for (k=0; k<mbs; k++){ 761 v = aa + 25*ai[k]; 762 vj = aj + ai[k]; 763 tp = t + k*5; 764 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 765 nz = ai[k+1] - ai[k]; 766 767 tp = t + (*vj)*5; 768 while (nz--) { 769 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 770 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 771 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 772 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 773 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 774 vj++; tp = t + (*vj)*5; 775 v += 25; 776 } 777 778 /* xk = inv(Dk)*(Dk*xk) */ 779 diag = aa+k*25; /* ptr to inv(Dk) */ 780 tp = t + k*5; 781 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 782 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 783 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 784 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 785 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 786 } 787 788 /* solve U*x = y by back substitution */ 789 for (k=mbs-1; k>=0; k--){ 790 v = aa + 25*ai[k]; 791 vj = aj + ai[k]; 792 tp = t + k*5; 793 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 794 nz = ai[k+1] - ai[k]; 795 796 tp = t + (*vj)*5; 797 while (nz--) { 798 /* xk += U(k,:)*x(:) */ 799 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 800 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 801 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 802 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 803 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 804 vj++; tp = t + (*vj)*5; 805 v += 25; 806 } 807 tp = t + k*5; 808 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 809 idx = 5*r[k]; 810 x[idx] = x0; 811 x[idx+1] = x1; 812 x[idx+2] = x2; 813 x[idx+3] = x3; 814 x[idx+4] = x4; 815 } 816 817 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 826 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 827 { 828 MatScalar *v,*diag; 829 PetscScalar *xp,x0,x1,x2,x3,x4; 830 PetscInt nz,*vj,k; 831 832 PetscFunctionBegin; 833 for (k=0; k<mbs; k++){ 834 v = aa + 25*ai[k]; 835 xp = x + k*5; 836 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 837 nz = ai[k+1] - ai[k]; 838 vj = aj + ai[k]; 839 xp = x + (*vj)*5; 840 while (nz--) { 841 /* x(:) += U(k,:)^T*(Dk*xk) */ 842 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 843 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 844 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 845 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 846 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 847 vj++; xp = x + (*vj)*5; 848 v += 25; 849 } 850 /* xk = inv(Dk)*(Dk*xk) */ 851 diag = aa+k*25; /* ptr to inv(Dk) */ 852 xp = x + k*5; 853 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 854 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 855 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 856 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 857 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 864 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 865 { 866 MatScalar *v; 867 PetscScalar *xp,x0,x1,x2,x3,x4; 868 PetscInt nz,*vj,k; 869 870 PetscFunctionBegin; 871 for (k=mbs-1; k>=0; k--){ 872 v = aa + 25*ai[k]; 873 xp = x + k*5; 874 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 875 nz = ai[k+1] - ai[k]; 876 vj = aj + ai[k]; 877 xp = x + (*vj)*5; 878 while (nz--) { 879 /* xk += U(k,:)*x(:) */ 880 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 881 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 882 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 883 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 884 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 885 vj++; 886 v += 25; xp = x + (*vj)*5; 887 } 888 xp = x + k*5; 889 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 896 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 897 { 898 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 899 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 900 MatScalar *aa=a->a; 901 PetscScalar *x,*b; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 908 /* solve U^T * D * y = b by forward substitution */ 909 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 910 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 911 912 /* solve U*x = y by back substitution */ 913 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 914 915 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 917 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 923 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 924 { 925 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 926 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 927 MatScalar *aa=a->a; 928 PetscScalar *x,*b; 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 935 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 936 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 938 ierr = PetscLogFlops(25.0*(a->nz + mbs));CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 944 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 945 { 946 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 947 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 948 MatScalar *aa=a->a; 949 PetscScalar *x,*b; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 956 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 957 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = PetscLogFlops(25.0*a->nz);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 965 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 968 IS isrow=a->row; 969 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 970 PetscErrorCode ierr; 971 const PetscInt *r; 972 PetscInt nz,*vj,k,idx; 973 MatScalar *aa=a->a,*v,*diag; 974 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 975 976 PetscFunctionBegin; 977 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 979 t = a->solve_work; 980 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 981 982 /* solve U^T * D * y = b by forward substitution */ 983 tp = t; 984 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 985 idx = 4*r[k]; 986 tp[0] = b[idx]; 987 tp[1] = b[idx+1]; 988 tp[2] = b[idx+2]; 989 tp[3] = b[idx+3]; 990 tp += 4; 991 } 992 993 for (k=0; k<mbs; k++){ 994 v = aa + 16*ai[k]; 995 vj = aj + ai[k]; 996 tp = t + k*4; 997 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 998 nz = ai[k+1] - ai[k]; 999 1000 tp = t + (*vj)*4; 1001 while (nz--) { 1002 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1003 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1004 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1005 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1006 vj++; tp = t + (*vj)*4; 1007 v += 16; 1008 } 1009 1010 /* xk = inv(Dk)*(Dk*xk) */ 1011 diag = aa+k*16; /* ptr to inv(Dk) */ 1012 tp = t + k*4; 1013 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1014 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1015 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1016 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1017 } 1018 1019 /* solve U*x = y by back substitution */ 1020 for (k=mbs-1; k>=0; k--){ 1021 v = aa + 16*ai[k]; 1022 vj = aj + ai[k]; 1023 tp = t + k*4; 1024 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1025 nz = ai[k+1] - ai[k]; 1026 1027 tp = t + (*vj)*4; 1028 while (nz--) { 1029 /* xk += U(k,:)*x(:) */ 1030 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1031 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1032 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1033 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1034 vj++; tp = t + (*vj)*4; 1035 v += 16; 1036 } 1037 tp = t + k*4; 1038 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1039 idx = 4*r[k]; 1040 x[idx] = x0; 1041 x[idx+1] = x1; 1042 x[idx+2] = x2; 1043 x[idx+3] = x3; 1044 } 1045 1046 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1049 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1055 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1056 { 1057 MatScalar *v,*diag; 1058 PetscScalar *xp,x0,x1,x2,x3; 1059 PetscInt nz,*vj,k; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<mbs; k++){ 1063 v = aa + 16*ai[k]; 1064 xp = x + k*4; 1065 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1066 nz = ai[k+1] - ai[k]; 1067 vj = aj + ai[k]; 1068 xp = x + (*vj)*4; 1069 while (nz--) { 1070 /* x(:) += U(k,:)^T*(Dk*xk) */ 1071 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1072 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1073 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1074 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1075 vj++; xp = x + (*vj)*4; 1076 v += 16; 1077 } 1078 /* xk = inv(Dk)*(Dk*xk) */ 1079 diag = aa+k*16; /* ptr to inv(Dk) */ 1080 xp = x + k*4; 1081 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1082 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1083 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1084 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1091 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1092 { 1093 MatScalar *v; 1094 PetscScalar *xp,x0,x1,x2,x3; 1095 PetscInt nz,*vj,k; 1096 1097 PetscFunctionBegin; 1098 for (k=mbs-1; k>=0; k--){ 1099 v = aa + 16*ai[k]; 1100 xp = x + k*4; 1101 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1102 nz = ai[k+1] - ai[k]; 1103 vj = aj + ai[k]; 1104 xp = x + (*vj)*4; 1105 while (nz--) { 1106 /* xk += U(k,:)*x(:) */ 1107 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1108 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1109 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1110 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1111 vj++; 1112 v += 16; xp = x + (*vj)*4; 1113 } 1114 xp = x + k*4; 1115 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1122 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1125 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1126 MatScalar *aa=a->a; 1127 PetscScalar *x,*b; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1133 1134 /* solve U^T * D * y = b by forward substitution */ 1135 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1136 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1137 1138 /* solve U*x = y by back substitution */ 1139 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1148 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1151 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1152 MatScalar *aa=a->a; 1153 PetscScalar *x,*b; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1160 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1163 ierr = PetscLogFlops(16.0*a->nz + mbs);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1169 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1170 { 1171 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1172 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1173 MatScalar *aa=a->a; 1174 PetscScalar *x,*b; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1180 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1181 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1182 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1190 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1193 IS isrow=a->row; 1194 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1195 PetscErrorCode ierr; 1196 const PetscInt *r; 1197 PetscInt nz,*vj,k,idx; 1198 MatScalar *aa=a->a,*v,*diag; 1199 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1200 1201 PetscFunctionBegin; 1202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1204 t = a->solve_work; 1205 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1206 1207 /* solve U^T * D * y = b by forward substitution */ 1208 tp = t; 1209 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1210 idx = 3*r[k]; 1211 tp[0] = b[idx]; 1212 tp[1] = b[idx+1]; 1213 tp[2] = b[idx+2]; 1214 tp += 3; 1215 } 1216 1217 for (k=0; k<mbs; k++){ 1218 v = aa + 9*ai[k]; 1219 vj = aj + ai[k]; 1220 tp = t + k*3; 1221 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1222 nz = ai[k+1] - ai[k]; 1223 1224 tp = t + (*vj)*3; 1225 while (nz--) { 1226 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1227 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1228 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1229 vj++; tp = t + (*vj)*3; 1230 v += 9; 1231 } 1232 1233 /* xk = inv(Dk)*(Dk*xk) */ 1234 diag = aa+k*9; /* ptr to inv(Dk) */ 1235 tp = t + k*3; 1236 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1237 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1238 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1239 } 1240 1241 /* solve U*x = y by back substitution */ 1242 for (k=mbs-1; k>=0; k--){ 1243 v = aa + 9*ai[k]; 1244 vj = aj + ai[k]; 1245 tp = t + k*3; 1246 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1247 nz = ai[k+1] - ai[k]; 1248 1249 tp = t + (*vj)*3; 1250 while (nz--) { 1251 /* xk += U(k,:)*x(:) */ 1252 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1253 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1254 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1255 vj++; tp = t + (*vj)*3; 1256 v += 9; 1257 } 1258 tp = t + k*3; 1259 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1260 idx = 3*r[k]; 1261 x[idx] = x0; 1262 x[idx+1] = x1; 1263 x[idx+2] = x2; 1264 } 1265 1266 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1275 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1276 { 1277 MatScalar *v,*diag; 1278 PetscScalar *xp,x0,x1,x2; 1279 PetscInt nz,*vj,k; 1280 1281 PetscFunctionBegin; 1282 for (k=0; k<mbs; k++){ 1283 v = aa + 9*ai[k]; 1284 xp = x + k*3; 1285 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1286 nz = ai[k+1] - ai[k]; 1287 vj = aj + ai[k]; 1288 xp = x + (*vj)*3; 1289 while (nz--) { 1290 /* x(:) += U(k,:)^T*(Dk*xk) */ 1291 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1292 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1293 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1294 vj++; xp = x + (*vj)*3; 1295 v += 9; 1296 } 1297 /* xk = inv(Dk)*(Dk*xk) */ 1298 diag = aa+k*9; /* ptr to inv(Dk) */ 1299 xp = x + k*3; 1300 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1301 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1302 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1309 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1310 { 1311 MatScalar *v; 1312 PetscScalar *xp,x0,x1,x2; 1313 PetscInt nz,*vj,k; 1314 1315 PetscFunctionBegin; 1316 for (k=mbs-1; k>=0; k--){ 1317 v = aa + 9*ai[k]; 1318 xp = x + k*3; 1319 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1320 nz = ai[k+1] - ai[k]; 1321 vj = aj + ai[k]; 1322 xp = x + (*vj)*3; 1323 while (nz--) { 1324 /* xk += U(k,:)*x(:) */ 1325 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1326 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1327 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1328 vj++; 1329 v += 9; xp = x + (*vj)*3; 1330 } 1331 xp = x + k*3; 1332 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1333 } 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1339 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1340 { 1341 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1342 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1343 MatScalar *aa=a->a; 1344 PetscScalar *x,*b; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 1351 /* solve U^T * D * y = b by forward substitution */ 1352 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1354 1355 /* solve U*x = y by back substitution */ 1356 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1357 1358 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1366 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1367 { 1368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1369 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1370 MatScalar *aa=a->a; 1371 PetscScalar *x,*b; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1376 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1377 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1378 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1379 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1381 ierr = PetscLogFlops(9.0*(a->nz + mbs));CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1387 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1388 { 1389 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1390 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1391 MatScalar *aa=a->a; 1392 PetscScalar *x,*b; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1397 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1398 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1399 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1400 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1402 ierr = PetscLogFlops(9.0*a->nz);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1408 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1409 { 1410 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1411 IS isrow=a->row; 1412 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1413 PetscErrorCode ierr; 1414 const PetscInt *r; 1415 PetscInt nz,*vj,k,k2,idx; 1416 MatScalar *aa=a->a,*v,*diag; 1417 PetscScalar *x,*b,x0,x1,*t; 1418 1419 PetscFunctionBegin; 1420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1422 t = a->solve_work; 1423 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1424 1425 /* solve U^T * D * y = perm(b) by forward substitution */ 1426 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1427 idx = 2*r[k]; 1428 t[k*2] = b[idx]; 1429 t[k*2+1] = b[idx+1]; 1430 } 1431 for (k=0; k<mbs; k++){ 1432 v = aa + 4*ai[k]; 1433 vj = aj + ai[k]; 1434 k2 = k*2; 1435 x0 = t[k2]; x1 = t[k2+1]; 1436 nz = ai[k+1] - ai[k]; 1437 while (nz--) { 1438 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1439 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1440 vj++; v += 4; 1441 } 1442 diag = aa+k*4; /* ptr to inv(Dk) */ 1443 t[k2] = diag[0]*x0 + diag[2]*x1; 1444 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1445 } 1446 1447 /* solve U*x = y by back substitution */ 1448 for (k=mbs-1; k>=0; k--){ 1449 v = aa + 4*ai[k]; 1450 vj = aj + ai[k]; 1451 k2 = k*2; 1452 x0 = t[k2]; x1 = t[k2+1]; 1453 nz = ai[k+1] - ai[k]; 1454 while (nz--) { 1455 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1456 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1457 vj++; v += 4; 1458 } 1459 t[k2] = x0; 1460 t[k2+1] = x1; 1461 idx = 2*r[k]; 1462 x[idx] = x0; 1463 x[idx+1] = x1; 1464 } 1465 1466 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1475 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1476 { 1477 MatScalar *v,*diag; 1478 PetscScalar x0,x1; 1479 PetscInt nz,*vj,k,k2; 1480 1481 PetscFunctionBegin; 1482 for (k=0; k<mbs; k++){ 1483 v = aa + 4*ai[k]; 1484 vj = aj + ai[k]; 1485 k2 = k*2; 1486 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1487 nz = ai[k+1] - ai[k]; 1488 1489 while (nz--) { 1490 /* x(:) += U(k,:)^T*(Dk*xk) */ 1491 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1492 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1493 vj++; v += 4; 1494 } 1495 /* xk = inv(Dk)*(Dk*xk) */ 1496 diag = aa+k*4; /* ptr to inv(Dk) */ 1497 x[k2] = diag[0]*x0 + diag[2]*x1; 1498 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1505 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1506 { 1507 MatScalar *v; 1508 PetscScalar x0,x1; 1509 PetscInt nz,*vj,k,k2; 1510 1511 PetscFunctionBegin; 1512 for (k=mbs-1; k>=0; k--){ 1513 v = aa + 4*ai[k]; 1514 vj = aj + ai[k]; 1515 k2 = k*2; 1516 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1517 nz = ai[k+1] - ai[k]; 1518 while (nz--) { 1519 /* xk += U(k,:)*x(:) */ 1520 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1521 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1522 vj++; v += 4; 1523 } 1524 x[k2] = x0; 1525 x[k2+1] = x1; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1532 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1533 { 1534 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1535 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1536 MatScalar *aa=a->a; 1537 PetscScalar *x,*b; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 1544 /* solve U^T * D * y = b by forward substitution */ 1545 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1546 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1547 1548 /* solve U*x = y by back substitution */ 1549 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1550 1551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1559 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1560 { 1561 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1562 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1563 MatScalar *aa=a->a; 1564 PetscScalar *x,*b; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1571 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1574 ierr = PetscLogFlops(4.0*(a->nz + mbs));CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1580 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1581 { 1582 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1583 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1584 MatScalar *aa=a->a; 1585 PetscScalar *x,*b; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1591 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1592 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1593 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1595 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1601 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1602 { 1603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1604 IS isrow=a->row; 1605 PetscErrorCode ierr; 1606 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1607 const MatScalar *aa=a->a,*v; 1608 PetscScalar *x,*b,xk,*t; 1609 PetscInt nz,k; 1610 1611 PetscFunctionBegin; 1612 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1614 t = a->solve_work; 1615 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1616 1617 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1618 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1619 for (k=0; k<mbs; k++){ 1620 v = aa + ai[k] + 1; 1621 vj = aj + ai[k] + 1; 1622 xk = t[k]; 1623 nz = ai[k+1] - ai[k] - 1; 1624 while (nz--) t[*vj++] += (*v++) * xk; 1625 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1626 } 1627 1628 /* solve U*perm(x) = y by back substitution */ 1629 for (k=mbs-1; k>=0; k--){ 1630 v = aa + ai[k] + 1; 1631 vj = aj + ai[k] + 1; 1632 nz = ai[k+1] - ai[k] - 1; 1633 while (nz--) t[k] += (*v++) * t[*vj++]; 1634 x[rp[k]] = t[k]; 1635 } 1636 1637 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1638 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1640 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1646 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1647 { 1648 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1649 IS isrow=a->row; 1650 PetscErrorCode ierr; 1651 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1652 const MatScalar *aa=a->a,*v; 1653 PetscReal diagk; 1654 PetscScalar *x,*b,xk; 1655 PetscInt nz,k; 1656 1657 PetscFunctionBegin; 1658 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1659 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1660 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1661 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1662 1663 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1664 for (k=0; k<mbs; k++){ 1665 v = aa + ai[k] + 1; 1666 vj = aj + ai[k] + 1; 1667 xk = x[k]; 1668 nz = ai[k+1] - ai[k] - 1; 1669 while (nz--) x[*vj++] += (*v++) * xk; 1670 1671 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1672 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1673 x[k] = xk*sqrt(diagk); 1674 } 1675 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1676 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1678 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1684 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1685 { 1686 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1687 IS isrow=a->row; 1688 PetscErrorCode ierr; 1689 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1690 const MatScalar *aa=a->a,*v; 1691 PetscReal diagk; 1692 PetscScalar *x,*b,*t; 1693 PetscInt nz,k; 1694 1695 PetscFunctionBegin; 1696 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1697 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1699 t = a->solve_work; 1700 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1701 1702 for (k=mbs-1; k>=0; k--){ 1703 v = aa + ai[k] + 1; 1704 vj = aj + ai[k] + 1; 1705 diagk = PetscRealPart(aa[ai[k]]); 1706 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1707 t[k] = b[k] * sqrt(diagk); 1708 nz = ai[k+1] - ai[k] - 1; 1709 while (nz--) t[k] += (*v++) * t[*vj++]; 1710 x[rp[k]] = t[k]; 1711 } 1712 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1713 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1715 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1721 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1722 { 1723 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 if (A->rmap->bs == 1) { 1728 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1729 } else { 1730 IS isrow=a->row; 1731 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1732 const MatScalar *aa=a->a,*v; 1733 PetscScalar *x,*b,*t; 1734 PetscInt nz,k,n,i; 1735 if (bb->n > a->solves_work_n) { 1736 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1737 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1738 a->solves_work_n = bb->n; 1739 } 1740 n = bb->n; 1741 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1742 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1743 t = a->solves_work; 1744 1745 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1746 1747 /* solve U^T*D*y = perm(b) by forward substitution */ 1748 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1749 for (k=0; k<mbs; k++){ 1750 v = aa + ai[k]; 1751 vj = aj + ai[k]; 1752 nz = ai[k+1] - ai[k]; 1753 while (nz--) { 1754 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1755 v++;vj++; 1756 } 1757 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1758 } 1759 1760 /* solve U*perm(x) = y by back substitution */ 1761 for (k=mbs-1; k>=0; k--){ 1762 v = aa + ai[k]; 1763 vj = aj + ai[k]; 1764 nz = ai[k+1] - ai[k]; 1765 while (nz--) { 1766 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1767 v++;vj++; 1768 } 1769 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1770 } 1771 1772 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1773 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1774 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1775 ierr = PetscLogFlops(bb->n*(4.0*a->nz + A->rmap->N));CHKERRQ(ierr); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 /* 1781 Special case where the matrix was ILU(0) factored in the natural 1782 ordering. This eliminates the need for the column and row permutation. 1783 */ 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1786 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1787 { 1788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1789 PetscErrorCode ierr; 1790 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1791 MatScalar *aa=a->a,*v; 1792 PetscScalar *x,*b,xk; 1793 PetscInt nz,*vj,k; 1794 1795 PetscFunctionBegin; 1796 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1797 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1798 1799 /* solve U^T*D*y = b by forward substitution */ 1800 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1801 for (k=0; k<mbs; k++){ 1802 v = aa + ai[k] + 1; 1803 vj = aj + ai[k] + 1; 1804 xk = x[k]; 1805 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1806 while (nz--) x[*vj++] += (*v++) * xk; 1807 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1808 } 1809 1810 /* solve U*x = y by back substitution */ 1811 for (k=mbs-2; k>=0; k--){ 1812 v = aa + ai[k] + 1; 1813 vj = aj + ai[k] + 1; 1814 xk = x[k]; 1815 nz = ai[k+1] - ai[k] - 1; 1816 while (nz--) xk += (*v++) * x[*vj++]; 1817 x[k] = xk; 1818 } 1819 1820 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1822 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1828 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1829 { 1830 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1831 PetscErrorCode ierr; 1832 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1833 MatScalar *aa=a->a,*v; 1834 PetscReal diagk; 1835 PetscScalar *x,*b; 1836 PetscInt nz,*vj,k; 1837 1838 PetscFunctionBegin; 1839 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1840 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1841 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1842 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1843 for (k=0; k<mbs; k++){ 1844 v = aa + ai[k] + 1; 1845 vj = aj + ai[k] + 1; 1846 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1847 while (nz--) x[*vj++] += (*v++) * x[k]; 1848 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1849 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1850 x[k] *= sqrt(diagk); 1851 } 1852 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1854 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1860 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1861 { 1862 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1863 PetscErrorCode ierr; 1864 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1865 MatScalar *aa=a->a,*v; 1866 PetscReal diagk; 1867 PetscScalar *x,*b; 1868 PetscInt nz,*vj,k; 1869 1870 PetscFunctionBegin; 1871 /* solve D^(1/2)*U*x = b by back substitution */ 1872 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1873 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1874 1875 for (k=mbs-1; k>=0; k--){ 1876 v = aa + ai[k] + 1; 1877 vj = aj + ai[k] + 1; 1878 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1879 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1880 x[k] = sqrt(diagk)*b[k]; 1881 nz = ai[k+1] - ai[k] - 1; 1882 while (nz--) x[k] += (*v++) * x[*vj++]; 1883 } 1884 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1885 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1886 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 1891 1892 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1895 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 1896 { 1897 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1898 PetscErrorCode ierr; 1899 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 1900 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 1901 PetscInt m,reallocs = 0,*levtmp; 1902 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1903 PetscInt incrlev,*lev,shift,prow,nz; 1904 PetscReal f = info->fill,levels = info->levels; 1905 PetscTruth perm_identity; 1906 1907 PetscFunctionBegin; 1908 /* check whether perm is the identity mapping */ 1909 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1910 1911 if (perm_identity){ 1912 a->permute = PETSC_FALSE; 1913 ai = a->i; aj = a->j; 1914 } else { /* non-trivial permutation */ 1915 a->permute = PETSC_TRUE; 1916 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 1917 ai = a->inew; aj = a->jnew; 1918 } 1919 1920 /* initialization */ 1921 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1922 umax = (PetscInt)(f*ai[mbs] + 1); 1923 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 1924 umax += mbs + 1; 1925 shift = mbs + 1; 1926 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 1927 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 1928 iu[0] = mbs + 1; 1929 juidx = mbs + 1; 1930 /* prowl: linked list for pivot row */ 1931 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 1932 /* q: linked list for col index */ 1933 q = prowl + mbs; 1934 levtmp = q + mbs; 1935 1936 for (i=0; i<mbs; i++){ 1937 prowl[i] = mbs; 1938 q[i] = 0; 1939 } 1940 1941 /* for each row k */ 1942 for (k=0; k<mbs; k++){ 1943 nzk = 0; 1944 q[k] = mbs; 1945 /* copy current row into linked list */ 1946 nz = ai[rip[k]+1] - ai[rip[k]]; 1947 j = ai[rip[k]]; 1948 while (nz--){ 1949 vj = rip[aj[j++]]; 1950 if (vj > k){ 1951 qm = k; 1952 do { 1953 m = qm; qm = q[m]; 1954 } while(qm < vj); 1955 if (qm == vj) { 1956 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 1957 } 1958 nzk++; 1959 q[m] = vj; 1960 q[vj] = qm; 1961 levtmp[vj] = 0; /* initialize lev for nonzero element */ 1962 } 1963 } 1964 1965 /* modify nonzero structure of k-th row by computing fill-in 1966 for each row prow to be merged in */ 1967 prow = k; 1968 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 1969 1970 while (prow < k){ 1971 /* merge row prow into k-th row */ 1972 jmin = iu[prow] + 1; 1973 jmax = iu[prow+1]; 1974 qm = k; 1975 for (j=jmin; j<jmax; j++){ 1976 incrlev = lev[j-shift] + 1; 1977 if (incrlev > levels) continue; 1978 vj = ju[j]; 1979 do { 1980 m = qm; qm = q[m]; 1981 } while (qm < vj); 1982 if (qm != vj){ /* a fill */ 1983 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 1984 levtmp[vj] = incrlev; 1985 } else { 1986 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 1987 } 1988 } 1989 prow = prowl[prow]; /* next pivot row */ 1990 } 1991 1992 /* add k to row list for first nonzero element in k-th row */ 1993 if (nzk > 1){ 1994 i = q[k]; /* col value of first nonzero element in k_th row of U */ 1995 prowl[k] = prowl[i]; prowl[i] = k; 1996 } 1997 iu[k+1] = iu[k] + nzk; 1998 1999 /* allocate more space to ju and lev if needed */ 2000 if (iu[k+1] > umax) { 2001 /* estimate how much additional space we will need */ 2002 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2003 /* just double the memory each time */ 2004 maxadd = umax; 2005 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2006 umax += maxadd; 2007 2008 /* allocate a longer ju */ 2009 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2010 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2011 ierr = PetscFree(ju);CHKERRQ(ierr); 2012 ju = jutmp; 2013 2014 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2015 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2016 ierr = PetscFree(lev);CHKERRQ(ierr); 2017 lev = jutmp; 2018 reallocs += 2; /* count how many times we realloc */ 2019 } 2020 2021 /* save nonzero structure of k-th row in ju */ 2022 i=k; 2023 while (nzk --) { 2024 i = q[i]; 2025 ju[juidx] = i; 2026 lev[juidx-shift] = levtmp[i]; 2027 juidx++; 2028 } 2029 } 2030 2031 #if defined(PETSC_USE_INFO) 2032 if (ai[mbs] != 0) { 2033 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2034 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2035 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2036 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2037 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2038 } else { 2039 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2040 } 2041 #endif 2042 2043 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2044 ierr = PetscFree(prowl);CHKERRQ(ierr); 2045 ierr = PetscFree(lev);CHKERRQ(ierr); 2046 2047 /* put together the new matrix */ 2048 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2049 2050 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2051 b = (Mat_SeqSBAIJ*)(B)->data; 2052 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2053 b->singlemalloc = PETSC_FALSE; 2054 b->free_a = PETSC_TRUE; 2055 b->free_ij = PETSC_TRUE; 2056 /* the next line frees the default space generated by the Create() */ 2057 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2058 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2059 b->j = ju; 2060 b->i = iu; 2061 b->diag = 0; 2062 b->ilen = 0; 2063 b->imax = 0; 2064 2065 if (b->row) { 2066 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2067 } 2068 if (b->icol) { 2069 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2070 } 2071 b->row = perm; 2072 b->icol = perm; 2073 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2074 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2075 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2076 /* In b structure: Free imax, ilen, old a, old j. 2077 Allocate idnew, solve_work, new a, new j */ 2078 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2079 b->maxnz = b->nz = iu[mbs]; 2080 2081 (B)->info.factor_mallocs = reallocs; 2082 (B)->info.fill_ratio_given = f; 2083 if (ai[mbs] != 0) { 2084 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2085 } else { 2086 (B)->info.fill_ratio_needed = 0.0; 2087 } 2088 ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #include "petscbt.h" 2093 #include "../src/mat/utils/freespace.h" 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2096 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2097 { 2098 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2099 Mat_SeqSBAIJ *b; 2100 PetscErrorCode ierr; 2101 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2102 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2103 const PetscInt *cols,*rip,*ai,*aj; 2104 PetscInt reallocs=0,i,*ui; 2105 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2106 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2107 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2108 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2109 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2110 PetscBT lnkbt; 2111 2112 PetscFunctionBegin; 2113 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2114 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2115 2116 /* 2117 This code originally uses Modified Sparse Row (MSR) storage 2118 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2119 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2120 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2121 thus the original code in MSR format is still used for these cases. 2122 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2123 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2124 */ 2125 if (bs > 1){ 2126 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* check whether perm is the identity mapping */ 2131 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2132 2133 /* special case that simply copies fill pattern */ 2134 if (!levels && perm_identity) { 2135 a->permute = PETSC_FALSE; 2136 /* reuse the column pointers and row offsets for memory savings */ 2137 ui = a->i; 2138 uj = a->j; 2139 free_ij = PETSC_FALSE; 2140 ratio_needed = 1.0; 2141 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2142 if (perm_identity){ 2143 a->permute = PETSC_FALSE; 2144 ai = a->i; aj = a->j; 2145 } else { /* non-trivial permutation */ 2146 a->permute = PETSC_TRUE; 2147 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2148 ai = a->inew; aj = a->jnew; 2149 } 2150 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2151 2152 /* initialization */ 2153 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2154 ui[0] = 0; 2155 2156 /* jl: linked list for storing indices of the pivot rows 2157 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2158 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2159 il = jl + am; 2160 uj_ptr = (PetscInt**)(il + am); 2161 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2162 for (i=0; i<am; i++){ 2163 jl[i] = am; il[i] = 0; 2164 } 2165 2166 /* create and initialize a linked list for storing column indices of the active row k */ 2167 nlnk = am + 1; 2168 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2169 2170 /* initial FreeSpace size is fill*(ai[am]+1) */ 2171 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2172 current_space = free_space; 2173 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2174 current_space_lvl = free_space_lvl; 2175 2176 for (k=0; k<am; k++){ /* for each active row k */ 2177 /* initialize lnk by the column indices of row rip[k] */ 2178 nzk = 0; 2179 ncols = ai[rip[k]+1] - ai[rip[k]]; 2180 cols = aj+ai[rip[k]]; 2181 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2182 nzk += nlnk; 2183 2184 /* update lnk by computing fill-in for each pivot row to be merged in */ 2185 prow = jl[k]; /* 1st pivot row */ 2186 2187 while (prow < k){ 2188 nextprow = jl[prow]; 2189 2190 /* merge prow into k-th row */ 2191 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2192 jmax = ui[prow+1]; 2193 ncols = jmax-jmin; 2194 i = jmin - ui[prow]; 2195 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2196 j = *(uj_lvl_ptr[prow] + i - 1); 2197 cols_lvl = uj_lvl_ptr[prow]+i; 2198 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2199 nzk += nlnk; 2200 2201 /* update il and jl for prow */ 2202 if (jmin < jmax){ 2203 il[prow] = jmin; 2204 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2205 } 2206 prow = nextprow; 2207 } 2208 2209 /* if free space is not available, make more free space */ 2210 if (current_space->local_remaining<nzk) { 2211 i = am - k + 1; /* num of unfactored rows */ 2212 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2213 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2214 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2215 reallocs++; 2216 } 2217 2218 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2219 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2220 2221 /* add the k-th row into il and jl */ 2222 if (nzk-1 > 0){ 2223 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2224 jl[k] = jl[i]; jl[i] = k; 2225 il[k] = ui[k] + 1; 2226 } 2227 uj_ptr[k] = current_space->array; 2228 uj_lvl_ptr[k] = current_space_lvl->array; 2229 2230 current_space->array += nzk; 2231 current_space->local_used += nzk; 2232 current_space->local_remaining -= nzk; 2233 current_space_lvl->array += nzk; 2234 current_space_lvl->local_used += nzk; 2235 current_space_lvl->local_remaining -= nzk; 2236 2237 ui[k+1] = ui[k] + nzk; 2238 } 2239 2240 #if defined(PETSC_USE_INFO) 2241 if (ai[am] != 0) { 2242 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2243 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2244 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2245 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2246 } else { 2247 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2248 } 2249 #endif 2250 2251 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2252 ierr = PetscFree(jl);CHKERRQ(ierr); 2253 2254 /* destroy list of free space and other temporary array(s) */ 2255 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2256 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2257 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2258 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2259 if (ai[am] != 0) { 2260 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2261 } else { 2262 ratio_needed = 0.0; 2263 } 2264 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2265 2266 /* put together the new matrix in MATSEQSBAIJ format */ 2267 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2268 2269 b = (Mat_SeqSBAIJ*)(fact)->data; 2270 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2271 b->singlemalloc = PETSC_FALSE; 2272 b->free_a = PETSC_TRUE; 2273 b->free_ij = free_ij; 2274 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2275 b->j = uj; 2276 b->i = ui; 2277 b->diag = 0; 2278 b->ilen = 0; 2279 b->imax = 0; 2280 b->row = perm; 2281 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2282 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2283 b->icol = perm; 2284 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2285 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2286 b->maxnz = b->nz = ui[am]; 2287 2288 (fact)->info.factor_mallocs = reallocs; 2289 (fact)->info.fill_ratio_given = fill; 2290 (fact)->info.fill_ratio_needed = ratio_needed; 2291 ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295