1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 13 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 14 { 15 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 16 IS isrow=a->row; 17 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 18 PetscErrorCode ierr; 19 const PetscInt *r; 20 PetscInt nz,*vj,k,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 #if defined(PETSC_USE_LOG) 169 PetscInt bs2 = a->bs2; 170 #endif 171 172 PetscFunctionBegin; 173 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 176 /* solve U^T * D * y = b by forward substitution */ 177 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 178 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 /* solve U*x = y by back substitution */ 181 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 182 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 PetscErrorCode ierr; 195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 196 PetscInt bs=A->rmap->bs; 197 MatScalar *aa=a->a; 198 PetscScalar *x,*b; 199 #if defined(PETSC_USE_LOG) 200 PetscInt bs2 = a->bs2; 201 #endif 202 203 PetscFunctionBegin; 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 207 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 217 { 218 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 221 PetscInt bs=A->rmap->bs; 222 MatScalar *aa=a->a; 223 PetscScalar *x,*b; 224 #if defined(PETSC_USE_LOG) 225 PetscInt bs2 = a->bs2; 226 #endif 227 228 PetscFunctionBegin; 229 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 232 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 241 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 242 { 243 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 244 IS isrow=a->row; 245 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 246 PetscErrorCode ierr; 247 const PetscInt *r; 248 PetscInt nz,*vj,k,idx; 249 MatScalar *aa=a->a,*v,*d; 250 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 251 252 PetscFunctionBegin; 253 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 t = a->solve_work; 256 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 257 258 /* solve U^T * D * y = b by forward substitution */ 259 tp = t; 260 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 261 idx = 7*r[k]; 262 tp[0] = b[idx]; 263 tp[1] = b[idx+1]; 264 tp[2] = b[idx+2]; 265 tp[3] = b[idx+3]; 266 tp[4] = b[idx+4]; 267 tp[5] = b[idx+5]; 268 tp[6] = b[idx+6]; 269 tp += 7; 270 } 271 272 for (k=0; k<mbs; k++){ 273 v = aa + 49*ai[k]; 274 vj = aj + ai[k]; 275 tp = t + k*7; 276 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 277 nz = ai[k+1] - ai[k]; 278 tp = t + (*vj)*7; 279 while (nz--) { 280 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 281 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 282 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 283 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 284 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 285 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 286 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 287 vj++; tp = t + (*vj)*7; 288 v += 49; 289 } 290 291 /* xk = inv(Dk)*(Dk*xk) */ 292 d = aa+k*49; /* ptr to inv(Dk) */ 293 tp = t + k*7; 294 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 295 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 296 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 297 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 298 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 299 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 300 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 301 } 302 303 /* solve U*x = y by back substitution */ 304 for (k=mbs-1; k>=0; k--){ 305 v = aa + 49*ai[k]; 306 vj = aj + ai[k]; 307 tp = t + k*7; 308 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 309 nz = ai[k+1] - ai[k]; 310 311 tp = t + (*vj)*7; 312 while (nz--) { 313 /* xk += U(k,:)*x(:) */ 314 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 315 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 316 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 317 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 318 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 319 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 320 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 321 vj++; tp = t + (*vj)*7; 322 v += 49; 323 } 324 tp = t + k*7; 325 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 326 idx = 7*r[k]; 327 x[idx] = x0; 328 x[idx+1] = x1; 329 x[idx+2] = x2; 330 x[idx+3] = x3; 331 x[idx+4] = x4; 332 x[idx+5] = x5; 333 x[idx+6] = x6; 334 } 335 336 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 337 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = PetscLogFlops(49.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 345 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 346 { 347 MatScalar *v,*d; 348 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 349 PetscInt nz,*vj,k; 350 351 PetscFunctionBegin; 352 for (k=0; k<mbs; k++){ 353 v = aa + 49*ai[k]; 354 xp = x + k*7; 355 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 356 nz = ai[k+1] - ai[k]; 357 vj = aj + ai[k]; 358 xp = x + (*vj)*7; 359 while (nz--) { 360 /* x(:) += U(k,:)^T*(Dk*xk) */ 361 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 362 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 363 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 364 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 365 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 366 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 367 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 368 vj++; xp = x + (*vj)*7; 369 v += 49; 370 } 371 /* xk = inv(Dk)*(Dk*xk) */ 372 d = aa+k*49; /* ptr to inv(Dk) */ 373 xp = x + k*7; 374 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 375 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 376 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 377 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 378 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 379 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 380 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 381 } 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 387 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 388 { 389 MatScalar *v; 390 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 391 PetscInt nz,*vj,k; 392 393 PetscFunctionBegin; 394 for (k=mbs-1; k>=0; k--){ 395 v = aa + 49*ai[k]; 396 xp = x + k*7; 397 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 398 nz = ai[k+1] - ai[k]; 399 vj = aj + ai[k]; 400 xp = x + (*vj)*7; 401 while (nz--) { 402 /* xk += U(k,:)*x(:) */ 403 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 404 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 405 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 406 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 407 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 408 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 409 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 410 vj++; 411 v += 49; xp = x + (*vj)*7; 412 } 413 xp = x + k*7; 414 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 415 } 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 421 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscErrorCode ierr; 425 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 426 MatScalar *aa=a->a; 427 PetscScalar *x,*b; 428 429 PetscFunctionBegin; 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 433 /* solve U^T * D * y = b by forward substitution */ 434 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 435 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 436 437 /* solve U*x = y by back substitution */ 438 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 439 440 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = PetscLogFlops(49.0*(2*a->nz + mbs));CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 448 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 449 { 450 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 451 PetscErrorCode ierr; 452 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 453 MatScalar *aa=a->a; 454 PetscScalar *x,*b; 455 456 PetscFunctionBegin; 457 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 460 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 461 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 463 ierr = PetscLogFlops(49.0*a->nz + mbs);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 469 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 470 { 471 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 472 PetscErrorCode ierr; 473 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 474 MatScalar *aa=a->a; 475 PetscScalar *x,*b; 476 477 PetscFunctionBegin; 478 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 481 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 482 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 484 ierr = PetscLogFlops(49.0*a->nz);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 490 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 491 { 492 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 493 IS isrow=a->row; 494 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 495 PetscErrorCode ierr; 496 const PetscInt *r; 497 PetscInt nz,*vj,k,idx; 498 MatScalar *aa=a->a,*v,*d; 499 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 500 501 PetscFunctionBegin; 502 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 503 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 504 t = a->solve_work; 505 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 506 507 /* solve U^T * D * y = b by forward substitution */ 508 tp = t; 509 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 510 idx = 6*r[k]; 511 tp[0] = b[idx]; 512 tp[1] = b[idx+1]; 513 tp[2] = b[idx+2]; 514 tp[3] = b[idx+3]; 515 tp[4] = b[idx+4]; 516 tp[5] = b[idx+5]; 517 tp += 6; 518 } 519 520 for (k=0; k<mbs; k++){ 521 v = aa + 36*ai[k]; 522 vj = aj + ai[k]; 523 tp = t + k*6; 524 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 525 nz = ai[k+1] - ai[k]; 526 tp = t + (*vj)*6; 527 while (nz--) { 528 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 529 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 530 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 531 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 532 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 533 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 534 vj++; tp = t + (*vj)*6; 535 v += 36; 536 } 537 538 /* xk = inv(Dk)*(Dk*xk) */ 539 d = aa+k*36; /* ptr to inv(Dk) */ 540 tp = t + k*6; 541 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 542 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 543 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 544 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 545 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 546 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 547 } 548 549 /* solve U*x = y by back substitution */ 550 for (k=mbs-1; k>=0; k--){ 551 v = aa + 36*ai[k]; 552 vj = aj + ai[k]; 553 tp = t + k*6; 554 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 555 nz = ai[k+1] - ai[k]; 556 557 tp = t + (*vj)*6; 558 while (nz--) { 559 /* xk += U(k,:)*x(:) */ 560 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 561 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 562 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 563 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 564 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 565 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 566 vj++; tp = t + (*vj)*6; 567 v += 36; 568 } 569 tp = t + k*6; 570 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 571 idx = 6*r[k]; 572 x[idx] = x0; 573 x[idx+1] = x1; 574 x[idx+2] = x2; 575 x[idx+3] = x3; 576 x[idx+4] = x4; 577 x[idx+5] = x5; 578 } 579 580 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 581 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 582 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 583 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 589 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 590 { 591 MatScalar *v,*d; 592 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 593 PetscInt nz,*vj,k; 594 595 PetscFunctionBegin; 596 for (k=0; k<mbs; k++){ 597 v = aa + 36*ai[k]; 598 xp = x + k*6; 599 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 600 nz = ai[k+1] - ai[k]; 601 vj = aj + ai[k]; 602 xp = x + (*vj)*6; 603 while (nz--) { 604 /* x(:) += U(k,:)^T*(Dk*xk) */ 605 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 606 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 607 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 608 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 609 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 610 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 611 vj++; xp = x + (*vj)*6; 612 v += 36; 613 } 614 /* xk = inv(Dk)*(Dk*xk) */ 615 d = aa+k*36; /* ptr to inv(Dk) */ 616 xp = x + k*6; 617 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 618 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 619 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 620 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 621 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 622 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 623 } 624 PetscFunctionReturn(0); 625 } 626 #undef __FUNCT__ 627 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 628 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 629 { 630 MatScalar *v; 631 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 632 PetscInt nz,*vj,k; 633 634 PetscFunctionBegin; 635 for (k=mbs-1; k>=0; k--){ 636 v = aa + 36*ai[k]; 637 xp = x + k*6; 638 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 639 nz = ai[k+1] - ai[k]; 640 vj = aj + ai[k]; 641 xp = x + (*vj)*6; 642 while (nz--) { 643 /* xk += U(k,:)*x(:) */ 644 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 645 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 646 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 647 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 648 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 649 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 650 vj++; 651 v += 36; xp = x + (*vj)*6; 652 } 653 xp = x + k*6; 654 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 655 } 656 PetscFunctionReturn(0); 657 } 658 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 662 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 663 { 664 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 665 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 666 MatScalar *aa=a->a; 667 PetscScalar *x,*b; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 672 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 673 674 /* solve U^T * D * y = b by forward substitution */ 675 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 676 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 677 678 /* solve U*x = y by back substitution */ 679 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 680 681 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 683 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 689 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 690 { 691 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 692 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 693 MatScalar *aa=a->a; 694 PetscScalar *x,*b; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 700 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 701 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 ierr = PetscLogFlops(36.0*a->nz + mbs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 710 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 711 { 712 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 713 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 714 MatScalar *aa=a->a; 715 PetscScalar *x,*b; 716 PetscErrorCode ierr; 717 718 PetscFunctionBegin; 719 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 722 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 731 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 732 { 733 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 734 IS isrow=a->row; 735 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 736 PetscErrorCode ierr; 737 const PetscInt *r; 738 PetscInt nz,*vj,k,idx; 739 MatScalar *aa=a->a,*v,*diag; 740 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 745 t = a->solve_work; 746 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 747 748 /* solve U^T * D * y = b by forward substitution */ 749 tp = t; 750 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 751 idx = 5*r[k]; 752 tp[0] = b[idx]; 753 tp[1] = b[idx+1]; 754 tp[2] = b[idx+2]; 755 tp[3] = b[idx+3]; 756 tp[4] = b[idx+4]; 757 tp += 5; 758 } 759 760 for (k=0; k<mbs; k++){ 761 v = aa + 25*ai[k]; 762 vj = aj + ai[k]; 763 tp = t + k*5; 764 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 765 nz = ai[k+1] - ai[k]; 766 767 tp = t + (*vj)*5; 768 while (nz--) { 769 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 770 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 771 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 772 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 773 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 774 vj++; tp = t + (*vj)*5; 775 v += 25; 776 } 777 778 /* xk = inv(Dk)*(Dk*xk) */ 779 diag = aa+k*25; /* ptr to inv(Dk) */ 780 tp = t + k*5; 781 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 782 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 783 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 784 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 785 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 786 } 787 788 /* solve U*x = y by back substitution */ 789 for (k=mbs-1; k>=0; k--){ 790 v = aa + 25*ai[k]; 791 vj = aj + ai[k]; 792 tp = t + k*5; 793 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 794 nz = ai[k+1] - ai[k]; 795 796 tp = t + (*vj)*5; 797 while (nz--) { 798 /* xk += U(k,:)*x(:) */ 799 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 800 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 801 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 802 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 803 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 804 vj++; tp = t + (*vj)*5; 805 v += 25; 806 } 807 tp = t + k*5; 808 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 809 idx = 5*r[k]; 810 x[idx] = x0; 811 x[idx+1] = x1; 812 x[idx+2] = x2; 813 x[idx+3] = x3; 814 x[idx+4] = x4; 815 } 816 817 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 826 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 827 { 828 MatScalar *v,*diag; 829 PetscScalar *xp,x0,x1,x2,x3,x4; 830 PetscInt nz,*vj,k; 831 832 PetscFunctionBegin; 833 for (k=0; k<mbs; k++){ 834 v = aa + 25*ai[k]; 835 xp = x + k*5; 836 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 837 nz = ai[k+1] - ai[k]; 838 vj = aj + ai[k]; 839 xp = x + (*vj)*5; 840 while (nz--) { 841 /* x(:) += U(k,:)^T*(Dk*xk) */ 842 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 843 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 844 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 845 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 846 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 847 vj++; xp = x + (*vj)*5; 848 v += 25; 849 } 850 /* xk = inv(Dk)*(Dk*xk) */ 851 diag = aa+k*25; /* ptr to inv(Dk) */ 852 xp = x + k*5; 853 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 854 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 855 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 856 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 857 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 864 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 865 { 866 MatScalar *v; 867 PetscScalar *xp,x0,x1,x2,x3,x4; 868 PetscInt nz,*vj,k; 869 870 PetscFunctionBegin; 871 for (k=mbs-1; k>=0; k--){ 872 v = aa + 25*ai[k]; 873 xp = x + k*5; 874 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 875 nz = ai[k+1] - ai[k]; 876 vj = aj + ai[k]; 877 xp = x + (*vj)*5; 878 while (nz--) { 879 /* xk += U(k,:)*x(:) */ 880 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 881 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 882 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 883 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 884 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 885 vj++; 886 v += 25; xp = x + (*vj)*5; 887 } 888 xp = x + k*5; 889 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 896 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 897 { 898 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 899 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 900 MatScalar *aa=a->a; 901 PetscScalar *x,*b; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 908 /* solve U^T * D * y = b by forward substitution */ 909 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 910 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 911 912 /* solve U*x = y by back substitution */ 913 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 914 915 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 917 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 923 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 924 { 925 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 926 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 927 MatScalar *aa=a->a; 928 PetscScalar *x,*b; 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 935 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 936 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 938 ierr = PetscLogFlops(25.0*(a->nz + mbs));CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 944 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 945 { 946 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 947 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 948 MatScalar *aa=a->a; 949 PetscScalar *x,*b; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 956 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 957 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = PetscLogFlops(25.0*a->nz);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 965 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 968 IS isrow=a->row; 969 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 970 PetscErrorCode ierr; 971 const PetscInt *r; 972 PetscInt nz,*vj,k,idx; 973 MatScalar *aa=a->a,*v,*diag; 974 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 975 976 PetscFunctionBegin; 977 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 979 t = a->solve_work; 980 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 981 982 /* solve U^T * D * y = b by forward substitution */ 983 tp = t; 984 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 985 idx = 4*r[k]; 986 tp[0] = b[idx]; 987 tp[1] = b[idx+1]; 988 tp[2] = b[idx+2]; 989 tp[3] = b[idx+3]; 990 tp += 4; 991 } 992 993 for (k=0; k<mbs; k++){ 994 v = aa + 16*ai[k]; 995 vj = aj + ai[k]; 996 tp = t + k*4; 997 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 998 nz = ai[k+1] - ai[k]; 999 1000 tp = t + (*vj)*4; 1001 while (nz--) { 1002 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1003 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1004 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1005 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1006 vj++; tp = t + (*vj)*4; 1007 v += 16; 1008 } 1009 1010 /* xk = inv(Dk)*(Dk*xk) */ 1011 diag = aa+k*16; /* ptr to inv(Dk) */ 1012 tp = t + k*4; 1013 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1014 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1015 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1016 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1017 } 1018 1019 /* solve U*x = y by back substitution */ 1020 for (k=mbs-1; k>=0; k--){ 1021 v = aa + 16*ai[k]; 1022 vj = aj + ai[k]; 1023 tp = t + k*4; 1024 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1025 nz = ai[k+1] - ai[k]; 1026 1027 tp = t + (*vj)*4; 1028 while (nz--) { 1029 /* xk += U(k,:)*x(:) */ 1030 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1031 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1032 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1033 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1034 vj++; tp = t + (*vj)*4; 1035 v += 16; 1036 } 1037 tp = t + k*4; 1038 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1039 idx = 4*r[k]; 1040 x[idx] = x0; 1041 x[idx+1] = x1; 1042 x[idx+2] = x2; 1043 x[idx+3] = x3; 1044 } 1045 1046 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1049 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1055 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1056 { 1057 MatScalar *v,*diag; 1058 PetscScalar *xp,x0,x1,x2,x3; 1059 PetscInt nz,*vj,k; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<mbs; k++){ 1063 v = aa + 16*ai[k]; 1064 xp = x + k*4; 1065 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1066 nz = ai[k+1] - ai[k]; 1067 vj = aj + ai[k]; 1068 xp = x + (*vj)*4; 1069 while (nz--) { 1070 /* x(:) += U(k,:)^T*(Dk*xk) */ 1071 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1072 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1073 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1074 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1075 vj++; xp = x + (*vj)*4; 1076 v += 16; 1077 } 1078 /* xk = inv(Dk)*(Dk*xk) */ 1079 diag = aa+k*16; /* ptr to inv(Dk) */ 1080 xp = x + k*4; 1081 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1082 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1083 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1084 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1091 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1092 { 1093 MatScalar *v; 1094 PetscScalar *xp,x0,x1,x2,x3; 1095 PetscInt nz,*vj,k; 1096 1097 PetscFunctionBegin; 1098 for (k=mbs-1; k>=0; k--){ 1099 v = aa + 16*ai[k]; 1100 xp = x + k*4; 1101 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1102 nz = ai[k+1] - ai[k]; 1103 vj = aj + ai[k]; 1104 xp = x + (*vj)*4; 1105 while (nz--) { 1106 /* xk += U(k,:)*x(:) */ 1107 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1108 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1109 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1110 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1111 vj++; 1112 v += 16; xp = x + (*vj)*4; 1113 } 1114 xp = x + k*4; 1115 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1122 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1125 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1126 MatScalar *aa=a->a; 1127 PetscScalar *x,*b; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1133 1134 /* solve U^T * D * y = b by forward substitution */ 1135 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1136 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1137 1138 /* solve U*x = y by back substitution */ 1139 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1148 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1151 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1152 MatScalar *aa=a->a; 1153 PetscScalar *x,*b; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1160 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1163 ierr = PetscLogFlops(16.0*a->nz + mbs);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1169 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1170 { 1171 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1172 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1173 MatScalar *aa=a->a; 1174 PetscScalar *x,*b; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1180 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1181 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1182 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1190 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1193 IS isrow=a->row; 1194 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1195 PetscErrorCode ierr; 1196 const PetscInt *r; 1197 PetscInt nz,*vj,k,idx; 1198 MatScalar *aa=a->a,*v,*diag; 1199 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1200 1201 PetscFunctionBegin; 1202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1204 t = a->solve_work; 1205 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1206 1207 /* solve U^T * D * y = b by forward substitution */ 1208 tp = t; 1209 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1210 idx = 3*r[k]; 1211 tp[0] = b[idx]; 1212 tp[1] = b[idx+1]; 1213 tp[2] = b[idx+2]; 1214 tp += 3; 1215 } 1216 1217 for (k=0; k<mbs; k++){ 1218 v = aa + 9*ai[k]; 1219 vj = aj + ai[k]; 1220 tp = t + k*3; 1221 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1222 nz = ai[k+1] - ai[k]; 1223 1224 tp = t + (*vj)*3; 1225 while (nz--) { 1226 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1227 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1228 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1229 vj++; tp = t + (*vj)*3; 1230 v += 9; 1231 } 1232 1233 /* xk = inv(Dk)*(Dk*xk) */ 1234 diag = aa+k*9; /* ptr to inv(Dk) */ 1235 tp = t + k*3; 1236 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1237 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1238 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1239 } 1240 1241 /* solve U*x = y by back substitution */ 1242 for (k=mbs-1; k>=0; k--){ 1243 v = aa + 9*ai[k]; 1244 vj = aj + ai[k]; 1245 tp = t + k*3; 1246 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1247 nz = ai[k+1] - ai[k]; 1248 1249 tp = t + (*vj)*3; 1250 while (nz--) { 1251 /* xk += U(k,:)*x(:) */ 1252 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1253 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1254 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1255 vj++; tp = t + (*vj)*3; 1256 v += 9; 1257 } 1258 tp = t + k*3; 1259 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1260 idx = 3*r[k]; 1261 x[idx] = x0; 1262 x[idx+1] = x1; 1263 x[idx+2] = x2; 1264 } 1265 1266 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1275 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1276 { 1277 MatScalar *v,*diag; 1278 PetscScalar *xp,x0,x1,x2; 1279 PetscInt nz,*vj,k; 1280 1281 PetscFunctionBegin; 1282 for (k=0; k<mbs; k++){ 1283 v = aa + 9*ai[k]; 1284 xp = x + k*3; 1285 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1286 nz = ai[k+1] - ai[k]; 1287 vj = aj + ai[k]; 1288 xp = x + (*vj)*3; 1289 while (nz--) { 1290 /* x(:) += U(k,:)^T*(Dk*xk) */ 1291 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1292 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1293 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1294 vj++; xp = x + (*vj)*3; 1295 v += 9; 1296 } 1297 /* xk = inv(Dk)*(Dk*xk) */ 1298 diag = aa+k*9; /* ptr to inv(Dk) */ 1299 xp = x + k*3; 1300 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1301 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1302 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1309 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1310 { 1311 MatScalar *v; 1312 PetscScalar *xp,x0,x1,x2; 1313 PetscInt nz,*vj,k; 1314 1315 PetscFunctionBegin; 1316 for (k=mbs-1; k>=0; k--){ 1317 v = aa + 9*ai[k]; 1318 xp = x + k*3; 1319 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1320 nz = ai[k+1] - ai[k]; 1321 vj = aj + ai[k]; 1322 xp = x + (*vj)*3; 1323 while (nz--) { 1324 /* xk += U(k,:)*x(:) */ 1325 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1326 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1327 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1328 vj++; 1329 v += 9; xp = x + (*vj)*3; 1330 } 1331 xp = x + k*3; 1332 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1333 } 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1339 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1340 { 1341 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1342 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1343 MatScalar *aa=a->a; 1344 PetscScalar *x,*b; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 1351 /* solve U^T * D * y = b by forward substitution */ 1352 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1354 1355 /* solve U*x = y by back substitution */ 1356 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1357 1358 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1366 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1367 { 1368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1369 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1370 MatScalar *aa=a->a; 1371 PetscScalar *x,*b; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1376 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1377 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1378 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1379 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1381 ierr = PetscLogFlops(9.0*(a->nz + mbs));CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1387 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1388 { 1389 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1390 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1391 MatScalar *aa=a->a; 1392 PetscScalar *x,*b; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1397 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1398 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1399 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1400 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1402 ierr = PetscLogFlops(9.0*a->nz);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1408 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1409 { 1410 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1411 IS isrow=a->row; 1412 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1413 PetscErrorCode ierr; 1414 const PetscInt *r; 1415 PetscInt nz,*vj,k,k2,idx; 1416 MatScalar *aa=a->a,*v,*diag; 1417 PetscScalar *x,*b,x0,x1,*t; 1418 1419 PetscFunctionBegin; 1420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1422 t = a->solve_work; 1423 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1424 1425 /* solve U^T * D * y = perm(b) by forward substitution */ 1426 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1427 idx = 2*r[k]; 1428 t[k*2] = b[idx]; 1429 t[k*2+1] = b[idx+1]; 1430 } 1431 for (k=0; k<mbs; k++){ 1432 v = aa + 4*ai[k]; 1433 vj = aj + ai[k]; 1434 k2 = k*2; 1435 x0 = t[k2]; x1 = t[k2+1]; 1436 nz = ai[k+1] - ai[k]; 1437 while (nz--) { 1438 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1439 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1440 vj++; v += 4; 1441 } 1442 diag = aa+k*4; /* ptr to inv(Dk) */ 1443 t[k2] = diag[0]*x0 + diag[2]*x1; 1444 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1445 } 1446 1447 /* solve U*x = y by back substitution */ 1448 for (k=mbs-1; k>=0; k--){ 1449 v = aa + 4*ai[k]; 1450 vj = aj + ai[k]; 1451 k2 = k*2; 1452 x0 = t[k2]; x1 = t[k2+1]; 1453 nz = ai[k+1] - ai[k]; 1454 while (nz--) { 1455 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1456 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1457 vj++; v += 4; 1458 } 1459 t[k2] = x0; 1460 t[k2+1] = x1; 1461 idx = 2*r[k]; 1462 x[idx] = x0; 1463 x[idx+1] = x1; 1464 } 1465 1466 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1475 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1476 { 1477 MatScalar *v,*diag; 1478 PetscScalar x0,x1; 1479 PetscInt nz,*vj,k,k2; 1480 1481 PetscFunctionBegin; 1482 for (k=0; k<mbs; k++){ 1483 v = aa + 4*ai[k]; 1484 vj = aj + ai[k]; 1485 k2 = k*2; 1486 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1487 nz = ai[k+1] - ai[k]; 1488 1489 while (nz--) { 1490 /* x(:) += U(k,:)^T*(Dk*xk) */ 1491 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1492 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1493 vj++; v += 4; 1494 } 1495 /* xk = inv(Dk)*(Dk*xk) */ 1496 diag = aa+k*4; /* ptr to inv(Dk) */ 1497 x[k2] = diag[0]*x0 + diag[2]*x1; 1498 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1505 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1506 { 1507 MatScalar *v; 1508 PetscScalar x0,x1; 1509 PetscInt nz,*vj,k,k2; 1510 1511 PetscFunctionBegin; 1512 for (k=mbs-1; k>=0; k--){ 1513 v = aa + 4*ai[k]; 1514 vj = aj + ai[k]; 1515 k2 = k*2; 1516 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1517 nz = ai[k+1] - ai[k]; 1518 while (nz--) { 1519 /* xk += U(k,:)*x(:) */ 1520 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1521 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1522 vj++; v += 4; 1523 } 1524 x[k2] = x0; 1525 x[k2+1] = x1; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1532 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1533 { 1534 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1535 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1536 MatScalar *aa=a->a; 1537 PetscScalar *x,*b; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 1544 /* solve U^T * D * y = b by forward substitution */ 1545 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1546 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1547 1548 /* solve U*x = y by back substitution */ 1549 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1550 1551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1559 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1560 { 1561 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1562 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1563 MatScalar *aa=a->a; 1564 PetscScalar *x,*b; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1571 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1574 ierr = PetscLogFlops(4.0*(a->nz + mbs));CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1580 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1581 { 1582 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1583 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1584 MatScalar *aa=a->a; 1585 PetscScalar *x,*b; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1591 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1592 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1593 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1595 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_newdatastruct" 1601 PetscErrorCode MatSolve_SeqSBAIJ_1_newdatastruct(Mat A,Vec bb,Vec xx) 1602 { 1603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1604 IS isrow=a->row; 1605 PetscErrorCode ierr; 1606 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1607 const MatScalar *aa=a->a,*v; 1608 const PetscScalar *b; 1609 PetscScalar *x,xk,*t; 1610 PetscInt nz,k,j; 1611 1612 PetscFunctionBegin; 1613 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615 t = a->solve_work; 1616 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1617 1618 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1619 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1620 for (k=0; k<mbs; k++){ 1621 v = aa + ai[k]; 1622 vj = aj + ai[k]; 1623 xk = t[k]; 1624 nz = ai[k+1] - ai[k] - 1; 1625 for (j=0; j<nz; j++) t[*vj++] += (*v++) * xk; 1626 t[k] = xk*(*v++); /* *v = 1/D(k) */ 1627 } 1628 1629 /* solve U*perm(x) = y by back substitution */ 1630 for (k=mbs-1; k>=0; k--){ 1631 v = aa + ai[k]; 1632 vj = aj + ai[k]; 1633 nz = ai[k+1] - ai[k] - 1; 1634 for (j=0; j<nz; j++) t[k] += (*v++) * t[*vj++]; 1635 x[rp[k]] = t[k]; 1636 } 1637 1638 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1639 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1640 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1641 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1647 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1648 { 1649 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1650 IS isrow=a->row; 1651 PetscErrorCode ierr; 1652 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1653 const MatScalar *aa=a->a,*v; 1654 PetscScalar *x,*b,xk,*t; 1655 PetscInt nz,k; 1656 1657 PetscFunctionBegin; 1658 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1660 t = a->solve_work; 1661 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1662 1663 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1664 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1665 for (k=0; k<mbs; k++){ 1666 v = aa + ai[k] + 1; 1667 vj = aj + ai[k] + 1; 1668 xk = t[k]; 1669 nz = ai[k+1] - ai[k] - 1; 1670 while (nz--) t[*vj++] += (*v++) * xk; 1671 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1672 } 1673 1674 /* solve U*perm(x) = y by back substitution */ 1675 for (k=mbs-1; k>=0; k--){ 1676 v = aa + ai[k] + 1; 1677 vj = aj + ai[k] + 1; 1678 nz = ai[k+1] - ai[k] - 1; 1679 while (nz--) t[k] += (*v++) * t[*vj++]; 1680 x[rp[k]] = t[k]; 1681 } 1682 1683 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1684 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1685 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1686 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1692 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1693 { 1694 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1695 IS isrow=a->row; 1696 PetscErrorCode ierr; 1697 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1698 const MatScalar *aa=a->a,*v; 1699 PetscReal diagk; 1700 PetscScalar *x,*b,xk; 1701 PetscInt nz,k; 1702 1703 PetscFunctionBegin; 1704 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1705 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1707 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1708 1709 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1710 for (k=0; k<mbs; k++){ 1711 v = aa + ai[k] + 1; 1712 vj = aj + ai[k] + 1; 1713 xk = x[k]; 1714 nz = ai[k+1] - ai[k] - 1; 1715 while (nz--) x[*vj++] += (*v++) * xk; 1716 1717 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1718 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1719 x[k] = xk*sqrt(diagk); 1720 } 1721 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1722 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1723 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1724 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1730 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1731 { 1732 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1733 IS isrow=a->row; 1734 PetscErrorCode ierr; 1735 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1736 const MatScalar *aa=a->a,*v; 1737 PetscReal diagk; 1738 PetscScalar *x,*b,*t; 1739 PetscInt nz,k; 1740 1741 PetscFunctionBegin; 1742 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1745 t = a->solve_work; 1746 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1747 1748 for (k=mbs-1; k>=0; k--){ 1749 v = aa + ai[k] + 1; 1750 vj = aj + ai[k] + 1; 1751 diagk = PetscRealPart(aa[ai[k]]); 1752 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1753 t[k] = b[k] * sqrt(diagk); 1754 nz = ai[k+1] - ai[k] - 1; 1755 while (nz--) t[k] += (*v++) * t[*vj++]; 1756 x[rp[k]] = t[k]; 1757 } 1758 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1759 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1760 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1761 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1767 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1768 { 1769 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 if (A->rmap->bs == 1) { 1774 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1775 } else { 1776 IS isrow=a->row; 1777 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1778 const MatScalar *aa=a->a,*v; 1779 PetscScalar *x,*b,*t; 1780 PetscInt nz,k,n,i; 1781 if (bb->n > a->solves_work_n) { 1782 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1783 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1784 a->solves_work_n = bb->n; 1785 } 1786 n = bb->n; 1787 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1788 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1789 t = a->solves_work; 1790 1791 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1792 1793 /* solve U^T*D*y = perm(b) by forward substitution */ 1794 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1795 for (k=0; k<mbs; k++){ 1796 v = aa + ai[k]; 1797 vj = aj + ai[k]; 1798 nz = ai[k+1] - ai[k]; 1799 while (nz--) { 1800 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1801 v++;vj++; 1802 } 1803 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1804 } 1805 1806 /* solve U*perm(x) = y by back substitution */ 1807 for (k=mbs-1; k>=0; k--){ 1808 v = aa + ai[k]; 1809 vj = aj + ai[k]; 1810 nz = ai[k+1] - ai[k]; 1811 while (nz--) { 1812 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1813 v++;vj++; 1814 } 1815 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1816 } 1817 1818 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1819 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1820 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1821 ierr = PetscLogFlops(bb->n*(4.0*a->nz + A->rmap->N));CHKERRQ(ierr); 1822 } 1823 PetscFunctionReturn(0); 1824 } 1825 1826 /* 1827 Special case where the matrix was ILU(0) factored in the natural 1828 ordering. This eliminates the need for the column and row permutation. 1829 */ 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct" 1832 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 1833 { 1834 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1835 PetscErrorCode ierr; 1836 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; 1837 const MatScalar *aa=a->a,*v; 1838 const PetscScalar *b; 1839 PetscScalar *x,xi; 1840 PetscInt nz,i,k,j; 1841 1842 PetscFunctionBegin; 1843 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1844 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1845 1846 /* solve U^T*D*y = b by forward substitution */ 1847 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1848 for (i=0; i<mbs; i++){ 1849 v = aa + ai[i]; 1850 vj = aj + ai[i]; 1851 xi = x[i]; 1852 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */ 1853 for (j=0; j<nz; j++){ 1854 x[vj[j]] += v[j]* xi; 1855 } 1856 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */ 1857 } 1858 1859 /* solve U*x = y by back substitution */ 1860 k = ai[mbs-1]-2; 1861 for (i=mbs-2; i>=0; i--){ 1862 xi = x[i]; 1863 nz = ai[i+1] - ai[i] - 1; 1864 for (j=0; j<nz; j++){ 1865 xi += aa[k]* x[aj[k]]; k--; 1866 } 1867 x[i] = xi; 1868 k--; /* skip diagonal[i] */ 1869 } 1870 1871 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1872 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1873 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1879 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1880 { 1881 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1882 PetscErrorCode ierr; 1883 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1884 MatScalar *aa=a->a,*v; 1885 PetscScalar *x,*b,xk; 1886 PetscInt nz,*vj,k; 1887 1888 PetscFunctionBegin; 1889 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1890 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1891 1892 /* solve U^T*D*y = b by forward substitution */ 1893 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1894 for (k=0; k<mbs; k++){ 1895 v = aa + ai[k] + 1; 1896 vj = aj + ai[k] + 1; 1897 xk = x[k]; 1898 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1899 while (nz--) x[*vj++] += (*v++) * xk; 1900 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1901 } 1902 1903 /* solve U*x = y by back substitution */ 1904 for (k=mbs-2; k>=0; k--){ 1905 v = aa + ai[k] + 1; 1906 vj = aj + ai[k] + 1; 1907 xk = x[k]; 1908 nz = ai[k+1] - ai[k] - 1; 1909 while (nz--) { 1910 xk += (*v++) * x[*vj++]; 1911 } 1912 x[k] = xk; 1913 } 1914 1915 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1917 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1923 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1924 { 1925 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1926 PetscErrorCode ierr; 1927 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1928 MatScalar *aa=a->a,*v; 1929 PetscReal diagk; 1930 PetscScalar *x,*b; 1931 PetscInt nz,*vj,k; 1932 1933 PetscFunctionBegin; 1934 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1935 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1938 for (k=0; k<mbs; k++){ 1939 v = aa + ai[k] + 1; 1940 vj = aj + ai[k] + 1; 1941 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1942 while (nz--) x[*vj++] += (*v++) * x[k]; 1943 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1944 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1945 x[k] *= sqrt(diagk); 1946 } 1947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1949 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1955 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1956 { 1957 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1958 PetscErrorCode ierr; 1959 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1960 MatScalar *aa=a->a,*v; 1961 PetscReal diagk; 1962 PetscScalar *x,*b; 1963 PetscInt nz,*vj,k; 1964 1965 PetscFunctionBegin; 1966 /* solve D^(1/2)*U*x = b by back substitution */ 1967 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1969 1970 for (k=mbs-1; k>=0; k--){ 1971 v = aa + ai[k] + 1; 1972 vj = aj + ai[k] + 1; 1973 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1974 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1975 x[k] = sqrt(diagk)*b[k]; 1976 nz = ai[k+1] - ai[k] - 1; 1977 while (nz--) x[k] += (*v++) * x[*vj++]; 1978 } 1979 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1981 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 1986 1987 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1990 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 1991 { 1992 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1993 PetscErrorCode ierr; 1994 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 1995 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 1996 PetscInt m,reallocs = 0,*levtmp; 1997 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1998 PetscInt incrlev,*lev,shift,prow,nz; 1999 PetscReal f = info->fill,levels = info->levels; 2000 PetscTruth perm_identity; 2001 2002 PetscFunctionBegin; 2003 /* check whether perm is the identity mapping */ 2004 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2005 2006 if (perm_identity){ 2007 a->permute = PETSC_FALSE; 2008 ai = a->i; aj = a->j; 2009 } else { /* non-trivial permutation */ 2010 a->permute = PETSC_TRUE; 2011 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2012 ai = a->inew; aj = a->jnew; 2013 } 2014 2015 /* initialization */ 2016 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2017 umax = (PetscInt)(f*ai[mbs] + 1); 2018 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 2019 umax += mbs + 1; 2020 shift = mbs + 1; 2021 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 2022 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 2023 iu[0] = mbs + 1; 2024 juidx = mbs + 1; 2025 /* prowl: linked list for pivot row */ 2026 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);CHKERRQ(ierr); 2027 /* q: linked list for col index */ 2028 q = prowl + mbs; 2029 levtmp = q + mbs; 2030 2031 for (i=0; i<mbs; i++){ 2032 prowl[i] = mbs; 2033 q[i] = 0; 2034 } 2035 2036 /* for each row k */ 2037 for (k=0; k<mbs; k++){ 2038 nzk = 0; 2039 q[k] = mbs; 2040 /* copy current row into linked list */ 2041 nz = ai[rip[k]+1] - ai[rip[k]]; 2042 j = ai[rip[k]]; 2043 while (nz--){ 2044 vj = rip[aj[j++]]; 2045 if (vj > k){ 2046 qm = k; 2047 do { 2048 m = qm; qm = q[m]; 2049 } while(qm < vj); 2050 if (qm == vj) { 2051 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 2052 } 2053 nzk++; 2054 q[m] = vj; 2055 q[vj] = qm; 2056 levtmp[vj] = 0; /* initialize lev for nonzero element */ 2057 } 2058 } 2059 2060 /* modify nonzero structure of k-th row by computing fill-in 2061 for each row prow to be merged in */ 2062 prow = k; 2063 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 2064 2065 while (prow < k){ 2066 /* merge row prow into k-th row */ 2067 jmin = iu[prow] + 1; 2068 jmax = iu[prow+1]; 2069 qm = k; 2070 for (j=jmin; j<jmax; j++){ 2071 incrlev = lev[j-shift] + 1; 2072 if (incrlev > levels) continue; 2073 vj = ju[j]; 2074 do { 2075 m = qm; qm = q[m]; 2076 } while (qm < vj); 2077 if (qm != vj){ /* a fill */ 2078 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 2079 levtmp[vj] = incrlev; 2080 } else { 2081 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 2082 } 2083 } 2084 prow = prowl[prow]; /* next pivot row */ 2085 } 2086 2087 /* add k to row list for first nonzero element in k-th row */ 2088 if (nzk > 1){ 2089 i = q[k]; /* col value of first nonzero element in k_th row of U */ 2090 prowl[k] = prowl[i]; prowl[i] = k; 2091 } 2092 iu[k+1] = iu[k] + nzk; 2093 2094 /* allocate more space to ju and lev if needed */ 2095 if (iu[k+1] > umax) { 2096 /* estimate how much additional space we will need */ 2097 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2098 /* just double the memory each time */ 2099 maxadd = umax; 2100 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2101 umax += maxadd; 2102 2103 /* allocate a longer ju */ 2104 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2105 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2106 ierr = PetscFree(ju);CHKERRQ(ierr); 2107 ju = jutmp; 2108 2109 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2110 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2111 ierr = PetscFree(lev);CHKERRQ(ierr); 2112 lev = jutmp; 2113 reallocs += 2; /* count how many times we realloc */ 2114 } 2115 2116 /* save nonzero structure of k-th row in ju */ 2117 i=k; 2118 while (nzk --) { 2119 i = q[i]; 2120 ju[juidx] = i; 2121 lev[juidx-shift] = levtmp[i]; 2122 juidx++; 2123 } 2124 } 2125 2126 #if defined(PETSC_USE_INFO) 2127 if (ai[mbs] != 0) { 2128 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2129 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2130 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2131 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2132 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2133 } else { 2134 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2135 } 2136 #endif 2137 2138 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2139 ierr = PetscFree(prowl);CHKERRQ(ierr); 2140 ierr = PetscFree(lev);CHKERRQ(ierr); 2141 2142 /* put together the new matrix */ 2143 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2144 2145 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2146 b = (Mat_SeqSBAIJ*)(B)->data; 2147 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2148 b->singlemalloc = PETSC_FALSE; 2149 b->free_a = PETSC_TRUE; 2150 b->free_ij = PETSC_TRUE; 2151 /* the next line frees the default space generated by the Create() */ 2152 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2153 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2154 b->j = ju; 2155 b->i = iu; 2156 b->diag = 0; 2157 b->ilen = 0; 2158 b->imax = 0; 2159 2160 if (b->row) { 2161 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2162 } 2163 if (b->icol) { 2164 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2165 } 2166 b->row = perm; 2167 b->icol = perm; 2168 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2169 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2170 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2171 /* In b structure: Free imax, ilen, old a, old j. 2172 Allocate idnew, solve_work, new a, new j */ 2173 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2174 b->maxnz = b->nz = iu[mbs]; 2175 2176 (B)->info.factor_mallocs = reallocs; 2177 (B)->info.fill_ratio_given = f; 2178 if (ai[mbs] != 0) { 2179 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2180 } else { 2181 (B)->info.fill_ratio_needed = 0.0; 2182 } 2183 ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #include "petscbt.h" 2188 #include "../src/mat/utils/freespace.h" 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2191 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2192 { 2193 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2194 Mat_SeqSBAIJ *b; 2195 PetscErrorCode ierr; 2196 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2197 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2198 const PetscInt *cols,*rip,*ai,*aj; 2199 PetscInt reallocs=0,i,*ui; 2200 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2201 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2202 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2203 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2204 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2205 PetscBT lnkbt; 2206 2207 PetscFunctionBegin; 2208 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2209 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2210 2211 /* 2212 This code originally uses Modified Sparse Row (MSR) storage 2213 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2214 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2215 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2216 thus the original code in MSR format is still used for these cases. 2217 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2218 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2219 */ 2220 if (bs > 1){ 2221 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 /* check whether perm is the identity mapping */ 2226 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2227 2228 /* special case that simply copies fill pattern */ 2229 if (!levels && perm_identity) { 2230 a->permute = PETSC_FALSE; 2231 /* reuse the column pointers and row offsets for memory savings */ 2232 ui = a->i; 2233 uj = a->j; 2234 free_ij = PETSC_FALSE; 2235 ratio_needed = 1.0; 2236 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2237 if (perm_identity){ 2238 a->permute = PETSC_FALSE; 2239 ai = a->i; aj = a->j; 2240 } else { /* non-trivial permutation */ 2241 a->permute = PETSC_TRUE; 2242 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2243 ai = a->inew; aj = a->jnew; 2244 } 2245 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2246 2247 /* initialization */ 2248 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2249 ui[0] = 0; 2250 2251 /* jl: linked list for storing indices of the pivot rows 2252 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2253 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 2254 il = jl + am; 2255 uj_ptr = (PetscInt**)(il + am); 2256 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2257 for (i=0; i<am; i++){ 2258 jl[i] = am; il[i] = 0; 2259 } 2260 2261 /* create and initialize a linked list for storing column indices of the active row k */ 2262 nlnk = am + 1; 2263 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2264 2265 /* initial FreeSpace size is fill*(ai[am]+1) */ 2266 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2267 current_space = free_space; 2268 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2269 current_space_lvl = free_space_lvl; 2270 2271 for (k=0; k<am; k++){ /* for each active row k */ 2272 /* initialize lnk by the column indices of row rip[k] */ 2273 nzk = 0; 2274 ncols = ai[rip[k]+1] - ai[rip[k]]; 2275 cols = aj+ai[rip[k]]; 2276 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2277 nzk += nlnk; 2278 2279 /* update lnk by computing fill-in for each pivot row to be merged in */ 2280 prow = jl[k]; /* 1st pivot row */ 2281 2282 while (prow < k){ 2283 nextprow = jl[prow]; 2284 2285 /* merge prow into k-th row */ 2286 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2287 jmax = ui[prow+1]; 2288 ncols = jmax-jmin; 2289 i = jmin - ui[prow]; 2290 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2291 j = *(uj_lvl_ptr[prow] + i - 1); 2292 cols_lvl = uj_lvl_ptr[prow]+i; 2293 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2294 nzk += nlnk; 2295 2296 /* update il and jl for prow */ 2297 if (jmin < jmax){ 2298 il[prow] = jmin; 2299 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2300 } 2301 prow = nextprow; 2302 } 2303 2304 /* if free space is not available, make more free space */ 2305 if (current_space->local_remaining<nzk) { 2306 i = am - k + 1; /* num of unfactored rows */ 2307 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2308 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2309 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2310 reallocs++; 2311 } 2312 2313 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2314 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2315 2316 /* add the k-th row into il and jl */ 2317 if (nzk-1 > 0){ 2318 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2319 jl[k] = jl[i]; jl[i] = k; 2320 il[k] = ui[k] + 1; 2321 } 2322 uj_ptr[k] = current_space->array; 2323 uj_lvl_ptr[k] = current_space_lvl->array; 2324 2325 current_space->array += nzk; 2326 current_space->local_used += nzk; 2327 current_space->local_remaining -= nzk; 2328 current_space_lvl->array += nzk; 2329 current_space_lvl->local_used += nzk; 2330 current_space_lvl->local_remaining -= nzk; 2331 2332 ui[k+1] = ui[k] + nzk; 2333 } 2334 2335 #if defined(PETSC_USE_INFO) 2336 if (ai[am] != 0) { 2337 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2338 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2339 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2340 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2341 } else { 2342 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2343 } 2344 #endif 2345 2346 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2347 ierr = PetscFree(jl);CHKERRQ(ierr); 2348 2349 /* destroy list of free space and other temporary array(s) */ 2350 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2351 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2352 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2353 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2354 if (ai[am] != 0) { 2355 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2356 } else { 2357 ratio_needed = 0.0; 2358 } 2359 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2360 2361 /* put together the new matrix in MATSEQSBAIJ format */ 2362 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2363 2364 b = (Mat_SeqSBAIJ*)(fact)->data; 2365 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2366 b->singlemalloc = PETSC_FALSE; 2367 b->free_a = PETSC_TRUE; 2368 b->free_ij = free_ij; 2369 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2370 b->j = uj; 2371 b->i = ui; 2372 b->diag = 0; 2373 b->ilen = 0; 2374 b->imax = 0; 2375 b->row = perm; 2376 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2377 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2378 b->icol = perm; 2379 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2380 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2381 b->maxnz = b->nz = ui[am]; 2382 2383 (fact)->info.factor_mallocs = reallocs; 2384 (fact)->info.fill_ratio_given = fill; 2385 (fact)->info.fill_ratio_needed = ratio_needed; 2386 ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390