1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolve_SeqSBAIJ_N" 13 PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 14 { 15 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 16 IS isrow=a->row; 17 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 18 PetscErrorCode ierr; 19 const PetscInt *r; 20 PetscInt nz,*vj,k,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 #if defined(PETSC_USE_LOG) 169 PetscInt bs2 = a->bs2; 170 #endif 171 172 PetscFunctionBegin; 173 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 176 /* solve U^T * D * y = b by forward substitution */ 177 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 178 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 /* solve U*x = y by back substitution */ 181 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 182 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering" 191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 PetscErrorCode ierr; 195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 196 PetscInt bs=A->rmap->bs; 197 MatScalar *aa=a->a; 198 PetscScalar *x,*b; 199 #if defined(PETSC_USE_LOG) 200 PetscInt bs2 = a->bs2; 201 #endif 202 203 PetscFunctionBegin; 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 207 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering" 216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 217 { 218 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 221 PetscInt bs=A->rmap->bs; 222 MatScalar *aa=a->a; 223 PetscScalar *x,*b; 224 #if defined(PETSC_USE_LOG) 225 PetscInt bs2 = a->bs2; 226 #endif 227 228 PetscFunctionBegin; 229 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 232 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7" 241 PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx) 242 { 243 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 244 IS isrow=a->row; 245 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 246 PetscErrorCode ierr; 247 const PetscInt *r; 248 PetscInt nz,*vj,k,idx; 249 MatScalar *aa=a->a,*v,*d; 250 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 251 252 PetscFunctionBegin; 253 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 t = a->solve_work; 256 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 257 258 /* solve U^T * D * y = b by forward substitution */ 259 tp = t; 260 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 261 idx = 7*r[k]; 262 tp[0] = b[idx]; 263 tp[1] = b[idx+1]; 264 tp[2] = b[idx+2]; 265 tp[3] = b[idx+3]; 266 tp[4] = b[idx+4]; 267 tp[5] = b[idx+5]; 268 tp[6] = b[idx+6]; 269 tp += 7; 270 } 271 272 for (k=0; k<mbs; k++){ 273 v = aa + 49*ai[k]; 274 vj = aj + ai[k]; 275 tp = t + k*7; 276 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 277 nz = ai[k+1] - ai[k]; 278 tp = t + (*vj)*7; 279 while (nz--) { 280 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 281 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 282 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 283 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 284 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 285 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 286 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 287 vj++; tp = t + (*vj)*7; 288 v += 49; 289 } 290 291 /* xk = inv(Dk)*(Dk*xk) */ 292 d = aa+k*49; /* ptr to inv(Dk) */ 293 tp = t + k*7; 294 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 295 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 296 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 297 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 298 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 299 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 300 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 301 } 302 303 /* solve U*x = y by back substitution */ 304 for (k=mbs-1; k>=0; k--){ 305 v = aa + 49*ai[k]; 306 vj = aj + ai[k]; 307 tp = t + k*7; 308 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 309 nz = ai[k+1] - ai[k]; 310 311 tp = t + (*vj)*7; 312 while (nz--) { 313 /* xk += U(k,:)*x(:) */ 314 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 315 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 316 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 317 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 318 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 319 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 320 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 321 vj++; tp = t + (*vj)*7; 322 v += 49; 323 } 324 tp = t + k*7; 325 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 326 idx = 7*r[k]; 327 x[idx] = x0; 328 x[idx+1] = x1; 329 x[idx+2] = x2; 330 x[idx+3] = x3; 331 x[idx+4] = x4; 332 x[idx+5] = x5; 333 x[idx+6] = x6; 334 } 335 336 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 337 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = PetscLogFlops(49.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 345 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 346 { 347 MatScalar *v,*d; 348 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 349 PetscInt nz,*vj,k; 350 351 PetscFunctionBegin; 352 for (k=0; k<mbs; k++){ 353 v = aa + 49*ai[k]; 354 xp = x + k*7; 355 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 356 nz = ai[k+1] - ai[k]; 357 vj = aj + ai[k]; 358 xp = x + (*vj)*7; 359 while (nz--) { 360 /* x(:) += U(k,:)^T*(Dk*xk) */ 361 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 362 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 363 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 364 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 365 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 366 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 367 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 368 vj++; xp = x + (*vj)*7; 369 v += 49; 370 } 371 /* xk = inv(Dk)*(Dk*xk) */ 372 d = aa+k*49; /* ptr to inv(Dk) */ 373 xp = x + k*7; 374 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 375 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 376 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 377 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 378 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 379 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 380 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 381 } 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 387 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 388 { 389 MatScalar *v; 390 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 391 PetscInt nz,*vj,k; 392 393 PetscFunctionBegin; 394 for (k=mbs-1; k>=0; k--){ 395 v = aa + 49*ai[k]; 396 xp = x + k*7; 397 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 398 nz = ai[k+1] - ai[k]; 399 vj = aj + ai[k]; 400 xp = x + (*vj)*7; 401 while (nz--) { 402 /* xk += U(k,:)*x(:) */ 403 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 404 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 405 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 406 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 407 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 408 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 409 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 410 vj++; 411 v += 49; xp = x + (*vj)*7; 412 } 413 xp = x + k*7; 414 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 415 } 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering" 421 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscErrorCode ierr; 425 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 426 MatScalar *aa=a->a; 427 PetscScalar *x,*b; 428 429 PetscFunctionBegin; 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 433 /* solve U^T * D * y = b by forward substitution */ 434 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 435 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 436 437 /* solve U*x = y by back substitution */ 438 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 439 440 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = PetscLogFlops(49.0*(2*a->nz + mbs));CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering" 448 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 449 { 450 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 451 PetscErrorCode ierr; 452 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 453 MatScalar *aa=a->a; 454 PetscScalar *x,*b; 455 456 PetscFunctionBegin; 457 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 460 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 461 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 463 ierr = PetscLogFlops(49.0*a->nz + mbs);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering" 469 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 470 { 471 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 472 PetscErrorCode ierr; 473 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 474 MatScalar *aa=a->a; 475 PetscScalar *x,*b; 476 477 PetscFunctionBegin; 478 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 481 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 482 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 484 ierr = PetscLogFlops(49.0*a->nz);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatSolve_SeqSBAIJ_6" 490 PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx) 491 { 492 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 493 IS isrow=a->row; 494 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 495 PetscErrorCode ierr; 496 const PetscInt *r; 497 PetscInt nz,*vj,k,idx; 498 MatScalar *aa=a->a,*v,*d; 499 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 500 501 PetscFunctionBegin; 502 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 503 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 504 t = a->solve_work; 505 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 506 507 /* solve U^T * D * y = b by forward substitution */ 508 tp = t; 509 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 510 idx = 6*r[k]; 511 tp[0] = b[idx]; 512 tp[1] = b[idx+1]; 513 tp[2] = b[idx+2]; 514 tp[3] = b[idx+3]; 515 tp[4] = b[idx+4]; 516 tp[5] = b[idx+5]; 517 tp += 6; 518 } 519 520 for (k=0; k<mbs; k++){ 521 v = aa + 36*ai[k]; 522 vj = aj + ai[k]; 523 tp = t + k*6; 524 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 525 nz = ai[k+1] - ai[k]; 526 tp = t + (*vj)*6; 527 while (nz--) { 528 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 529 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 530 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 531 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 532 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 533 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 534 vj++; tp = t + (*vj)*6; 535 v += 36; 536 } 537 538 /* xk = inv(Dk)*(Dk*xk) */ 539 d = aa+k*36; /* ptr to inv(Dk) */ 540 tp = t + k*6; 541 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 542 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 543 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 544 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 545 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 546 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 547 } 548 549 /* solve U*x = y by back substitution */ 550 for (k=mbs-1; k>=0; k--){ 551 v = aa + 36*ai[k]; 552 vj = aj + ai[k]; 553 tp = t + k*6; 554 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 555 nz = ai[k+1] - ai[k]; 556 557 tp = t + (*vj)*6; 558 while (nz--) { 559 /* xk += U(k,:)*x(:) */ 560 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 561 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 562 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 563 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 564 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 565 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 566 vj++; tp = t + (*vj)*6; 567 v += 36; 568 } 569 tp = t + k*6; 570 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 571 idx = 6*r[k]; 572 x[idx] = x0; 573 x[idx+1] = x1; 574 x[idx+2] = x2; 575 x[idx+3] = x3; 576 x[idx+4] = x4; 577 x[idx+5] = x5; 578 } 579 580 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 581 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 582 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 583 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 589 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 590 { 591 MatScalar *v,*d; 592 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 593 PetscInt nz,*vj,k; 594 595 PetscFunctionBegin; 596 for (k=0; k<mbs; k++){ 597 v = aa + 36*ai[k]; 598 xp = x + k*6; 599 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 600 nz = ai[k+1] - ai[k]; 601 vj = aj + ai[k]; 602 xp = x + (*vj)*6; 603 while (nz--) { 604 /* x(:) += U(k,:)^T*(Dk*xk) */ 605 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 606 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 607 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 608 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 609 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 610 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 611 vj++; xp = x + (*vj)*6; 612 v += 36; 613 } 614 /* xk = inv(Dk)*(Dk*xk) */ 615 d = aa+k*36; /* ptr to inv(Dk) */ 616 xp = x + k*6; 617 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 618 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 619 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 620 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 621 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 622 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 623 } 624 PetscFunctionReturn(0); 625 } 626 #undef __FUNCT__ 627 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 628 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 629 { 630 MatScalar *v; 631 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 632 PetscInt nz,*vj,k; 633 634 PetscFunctionBegin; 635 for (k=mbs-1; k>=0; k--){ 636 v = aa + 36*ai[k]; 637 xp = x + k*6; 638 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 639 nz = ai[k+1] - ai[k]; 640 vj = aj + ai[k]; 641 xp = x + (*vj)*6; 642 while (nz--) { 643 /* xk += U(k,:)*x(:) */ 644 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 645 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 646 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 647 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 648 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 649 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 650 vj++; 651 v += 36; xp = x + (*vj)*6; 652 } 653 xp = x + k*6; 654 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 655 } 656 PetscFunctionReturn(0); 657 } 658 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering" 662 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 663 { 664 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 665 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 666 MatScalar *aa=a->a; 667 PetscScalar *x,*b; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 672 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 673 674 /* solve U^T * D * y = b by forward substitution */ 675 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 676 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 677 678 /* solve U*x = y by back substitution */ 679 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 680 681 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 683 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering" 689 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 690 { 691 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 692 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 693 MatScalar *aa=a->a; 694 PetscScalar *x,*b; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 700 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 701 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 ierr = PetscLogFlops(36.0*a->nz + mbs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering" 710 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 711 { 712 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 713 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 714 MatScalar *aa=a->a; 715 PetscScalar *x,*b; 716 PetscErrorCode ierr; 717 718 PetscFunctionBegin; 719 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 722 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatSolve_SeqSBAIJ_5" 731 PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx) 732 { 733 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 734 IS isrow=a->row; 735 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 736 PetscErrorCode ierr; 737 const PetscInt *r; 738 PetscInt nz,*vj,k,idx; 739 MatScalar *aa=a->a,*v,*diag; 740 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 745 t = a->solve_work; 746 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 747 748 /* solve U^T * D * y = b by forward substitution */ 749 tp = t; 750 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 751 idx = 5*r[k]; 752 tp[0] = b[idx]; 753 tp[1] = b[idx+1]; 754 tp[2] = b[idx+2]; 755 tp[3] = b[idx+3]; 756 tp[4] = b[idx+4]; 757 tp += 5; 758 } 759 760 for (k=0; k<mbs; k++){ 761 v = aa + 25*ai[k]; 762 vj = aj + ai[k]; 763 tp = t + k*5; 764 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 765 nz = ai[k+1] - ai[k]; 766 767 tp = t + (*vj)*5; 768 while (nz--) { 769 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 770 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 771 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 772 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 773 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 774 vj++; tp = t + (*vj)*5; 775 v += 25; 776 } 777 778 /* xk = inv(Dk)*(Dk*xk) */ 779 diag = aa+k*25; /* ptr to inv(Dk) */ 780 tp = t + k*5; 781 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 782 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 783 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 784 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 785 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 786 } 787 788 /* solve U*x = y by back substitution */ 789 for (k=mbs-1; k>=0; k--){ 790 v = aa + 25*ai[k]; 791 vj = aj + ai[k]; 792 tp = t + k*5; 793 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 794 nz = ai[k+1] - ai[k]; 795 796 tp = t + (*vj)*5; 797 while (nz--) { 798 /* xk += U(k,:)*x(:) */ 799 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 800 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 801 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 802 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 803 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 804 vj++; tp = t + (*vj)*5; 805 v += 25; 806 } 807 tp = t + k*5; 808 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 809 idx = 5*r[k]; 810 x[idx] = x0; 811 x[idx+1] = x1; 812 x[idx+2] = x2; 813 x[idx+3] = x3; 814 x[idx+4] = x4; 815 } 816 817 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 826 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 827 { 828 MatScalar *v,*diag; 829 PetscScalar *xp,x0,x1,x2,x3,x4; 830 PetscInt nz,*vj,k; 831 832 PetscFunctionBegin; 833 for (k=0; k<mbs; k++){ 834 v = aa + 25*ai[k]; 835 xp = x + k*5; 836 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 837 nz = ai[k+1] - ai[k]; 838 vj = aj + ai[k]; 839 xp = x + (*vj)*5; 840 while (nz--) { 841 /* x(:) += U(k,:)^T*(Dk*xk) */ 842 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 843 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 844 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 845 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 846 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 847 vj++; xp = x + (*vj)*5; 848 v += 25; 849 } 850 /* xk = inv(Dk)*(Dk*xk) */ 851 diag = aa+k*25; /* ptr to inv(Dk) */ 852 xp = x + k*5; 853 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 854 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 855 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 856 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 857 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 864 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 865 { 866 MatScalar *v; 867 PetscScalar *xp,x0,x1,x2,x3,x4; 868 PetscInt nz,*vj,k; 869 870 PetscFunctionBegin; 871 for (k=mbs-1; k>=0; k--){ 872 v = aa + 25*ai[k]; 873 xp = x + k*5; 874 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 875 nz = ai[k+1] - ai[k]; 876 vj = aj + ai[k]; 877 xp = x + (*vj)*5; 878 while (nz--) { 879 /* xk += U(k,:)*x(:) */ 880 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 881 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 882 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 883 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 884 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 885 vj++; 886 v += 25; xp = x + (*vj)*5; 887 } 888 xp = x + k*5; 889 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering" 896 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 897 { 898 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 899 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 900 MatScalar *aa=a->a; 901 PetscScalar *x,*b; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 908 /* solve U^T * D * y = b by forward substitution */ 909 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 910 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 911 912 /* solve U*x = y by back substitution */ 913 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 914 915 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 917 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering" 923 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 924 { 925 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 926 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 927 MatScalar *aa=a->a; 928 PetscScalar *x,*b; 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 935 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 936 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 938 ierr = PetscLogFlops(25.0*(a->nz + mbs));CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering" 944 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 945 { 946 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 947 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 948 MatScalar *aa=a->a; 949 PetscScalar *x,*b; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 956 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 957 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = PetscLogFlops(25.0*a->nz);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatSolve_SeqSBAIJ_4" 965 PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 968 IS isrow=a->row; 969 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 970 PetscErrorCode ierr; 971 const PetscInt *r; 972 PetscInt nz,*vj,k,idx; 973 MatScalar *aa=a->a,*v,*diag; 974 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 975 976 PetscFunctionBegin; 977 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 979 t = a->solve_work; 980 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 981 982 /* solve U^T * D * y = b by forward substitution */ 983 tp = t; 984 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 985 idx = 4*r[k]; 986 tp[0] = b[idx]; 987 tp[1] = b[idx+1]; 988 tp[2] = b[idx+2]; 989 tp[3] = b[idx+3]; 990 tp += 4; 991 } 992 993 for (k=0; k<mbs; k++){ 994 v = aa + 16*ai[k]; 995 vj = aj + ai[k]; 996 tp = t + k*4; 997 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 998 nz = ai[k+1] - ai[k]; 999 1000 tp = t + (*vj)*4; 1001 while (nz--) { 1002 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1003 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1004 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1005 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1006 vj++; tp = t + (*vj)*4; 1007 v += 16; 1008 } 1009 1010 /* xk = inv(Dk)*(Dk*xk) */ 1011 diag = aa+k*16; /* ptr to inv(Dk) */ 1012 tp = t + k*4; 1013 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1014 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1015 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1016 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1017 } 1018 1019 /* solve U*x = y by back substitution */ 1020 for (k=mbs-1; k>=0; k--){ 1021 v = aa + 16*ai[k]; 1022 vj = aj + ai[k]; 1023 tp = t + k*4; 1024 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1025 nz = ai[k+1] - ai[k]; 1026 1027 tp = t + (*vj)*4; 1028 while (nz--) { 1029 /* xk += U(k,:)*x(:) */ 1030 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1031 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1032 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1033 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1034 vj++; tp = t + (*vj)*4; 1035 v += 16; 1036 } 1037 tp = t + k*4; 1038 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1039 idx = 4*r[k]; 1040 x[idx] = x0; 1041 x[idx+1] = x1; 1042 x[idx+2] = x2; 1043 x[idx+3] = x3; 1044 } 1045 1046 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1049 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1055 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1056 { 1057 MatScalar *v,*diag; 1058 PetscScalar *xp,x0,x1,x2,x3; 1059 PetscInt nz,*vj,k; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<mbs; k++){ 1063 v = aa + 16*ai[k]; 1064 xp = x + k*4; 1065 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1066 nz = ai[k+1] - ai[k]; 1067 vj = aj + ai[k]; 1068 xp = x + (*vj)*4; 1069 while (nz--) { 1070 /* x(:) += U(k,:)^T*(Dk*xk) */ 1071 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1072 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1073 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1074 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1075 vj++; xp = x + (*vj)*4; 1076 v += 16; 1077 } 1078 /* xk = inv(Dk)*(Dk*xk) */ 1079 diag = aa+k*16; /* ptr to inv(Dk) */ 1080 xp = x + k*4; 1081 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1082 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1083 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1084 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1091 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1092 { 1093 MatScalar *v; 1094 PetscScalar *xp,x0,x1,x2,x3; 1095 PetscInt nz,*vj,k; 1096 1097 PetscFunctionBegin; 1098 for (k=mbs-1; k>=0; k--){ 1099 v = aa + 16*ai[k]; 1100 xp = x + k*4; 1101 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1102 nz = ai[k+1] - ai[k]; 1103 vj = aj + ai[k]; 1104 xp = x + (*vj)*4; 1105 while (nz--) { 1106 /* xk += U(k,:)*x(:) */ 1107 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1108 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1109 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1110 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1111 vj++; 1112 v += 16; xp = x + (*vj)*4; 1113 } 1114 xp = x + k*4; 1115 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering" 1122 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1125 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1126 MatScalar *aa=a->a; 1127 PetscScalar *x,*b; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1133 1134 /* solve U^T * D * y = b by forward substitution */ 1135 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1136 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1137 1138 /* solve U*x = y by back substitution */ 1139 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering" 1148 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1151 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1152 MatScalar *aa=a->a; 1153 PetscScalar *x,*b; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1160 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1163 ierr = PetscLogFlops(16.0*a->nz + mbs);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering" 1169 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 1170 { 1171 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1172 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1173 MatScalar *aa=a->a; 1174 PetscScalar *x,*b; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1180 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1181 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1182 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatSolve_SeqSBAIJ_3" 1190 PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1193 IS isrow=a->row; 1194 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1195 PetscErrorCode ierr; 1196 const PetscInt *r; 1197 PetscInt nz,*vj,k,idx; 1198 MatScalar *aa=a->a,*v,*diag; 1199 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1200 1201 PetscFunctionBegin; 1202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1204 t = a->solve_work; 1205 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1206 1207 /* solve U^T * D * y = b by forward substitution */ 1208 tp = t; 1209 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1210 idx = 3*r[k]; 1211 tp[0] = b[idx]; 1212 tp[1] = b[idx+1]; 1213 tp[2] = b[idx+2]; 1214 tp += 3; 1215 } 1216 1217 for (k=0; k<mbs; k++){ 1218 v = aa + 9*ai[k]; 1219 vj = aj + ai[k]; 1220 tp = t + k*3; 1221 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1222 nz = ai[k+1] - ai[k]; 1223 1224 tp = t + (*vj)*3; 1225 while (nz--) { 1226 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1227 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1228 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1229 vj++; tp = t + (*vj)*3; 1230 v += 9; 1231 } 1232 1233 /* xk = inv(Dk)*(Dk*xk) */ 1234 diag = aa+k*9; /* ptr to inv(Dk) */ 1235 tp = t + k*3; 1236 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1237 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1238 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1239 } 1240 1241 /* solve U*x = y by back substitution */ 1242 for (k=mbs-1; k>=0; k--){ 1243 v = aa + 9*ai[k]; 1244 vj = aj + ai[k]; 1245 tp = t + k*3; 1246 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1247 nz = ai[k+1] - ai[k]; 1248 1249 tp = t + (*vj)*3; 1250 while (nz--) { 1251 /* xk += U(k,:)*x(:) */ 1252 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1253 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1254 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1255 vj++; tp = t + (*vj)*3; 1256 v += 9; 1257 } 1258 tp = t + k*3; 1259 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1260 idx = 3*r[k]; 1261 x[idx] = x0; 1262 x[idx+1] = x1; 1263 x[idx+2] = x2; 1264 } 1265 1266 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1275 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1276 { 1277 MatScalar *v,*diag; 1278 PetscScalar *xp,x0,x1,x2; 1279 PetscInt nz,*vj,k; 1280 1281 PetscFunctionBegin; 1282 for (k=0; k<mbs; k++){ 1283 v = aa + 9*ai[k]; 1284 xp = x + k*3; 1285 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1286 nz = ai[k+1] - ai[k]; 1287 vj = aj + ai[k]; 1288 xp = x + (*vj)*3; 1289 while (nz--) { 1290 /* x(:) += U(k,:)^T*(Dk*xk) */ 1291 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1292 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1293 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1294 vj++; xp = x + (*vj)*3; 1295 v += 9; 1296 } 1297 /* xk = inv(Dk)*(Dk*xk) */ 1298 diag = aa+k*9; /* ptr to inv(Dk) */ 1299 xp = x + k*3; 1300 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1301 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1302 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1309 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1310 { 1311 MatScalar *v; 1312 PetscScalar *xp,x0,x1,x2; 1313 PetscInt nz,*vj,k; 1314 1315 PetscFunctionBegin; 1316 for (k=mbs-1; k>=0; k--){ 1317 v = aa + 9*ai[k]; 1318 xp = x + k*3; 1319 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1320 nz = ai[k+1] - ai[k]; 1321 vj = aj + ai[k]; 1322 xp = x + (*vj)*3; 1323 while (nz--) { 1324 /* xk += U(k,:)*x(:) */ 1325 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1326 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1327 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1328 vj++; 1329 v += 9; xp = x + (*vj)*3; 1330 } 1331 xp = x + k*3; 1332 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1333 } 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering" 1339 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1340 { 1341 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1342 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1343 MatScalar *aa=a->a; 1344 PetscScalar *x,*b; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 1351 /* solve U^T * D * y = b by forward substitution */ 1352 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1354 1355 /* solve U*x = y by back substitution */ 1356 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1357 1358 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering" 1366 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1367 { 1368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1369 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1370 MatScalar *aa=a->a; 1371 PetscScalar *x,*b; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1376 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1377 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1378 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1379 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1381 ierr = PetscLogFlops(9.0*(a->nz + mbs));CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering" 1387 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 1388 { 1389 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1390 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1391 MatScalar *aa=a->a; 1392 PetscScalar *x,*b; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1397 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1398 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1399 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1400 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1402 ierr = PetscLogFlops(9.0*a->nz);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSolve_SeqSBAIJ_2" 1408 PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx) 1409 { 1410 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1411 IS isrow=a->row; 1412 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1413 PetscErrorCode ierr; 1414 const PetscInt *r; 1415 PetscInt nz,*vj,k,k2,idx; 1416 MatScalar *aa=a->a,*v,*diag; 1417 PetscScalar *x,*b,x0,x1,*t; 1418 1419 PetscFunctionBegin; 1420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1422 t = a->solve_work; 1423 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1424 1425 /* solve U^T * D * y = perm(b) by forward substitution */ 1426 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1427 idx = 2*r[k]; 1428 t[k*2] = b[idx]; 1429 t[k*2+1] = b[idx+1]; 1430 } 1431 for (k=0; k<mbs; k++){ 1432 v = aa + 4*ai[k]; 1433 vj = aj + ai[k]; 1434 k2 = k*2; 1435 x0 = t[k2]; x1 = t[k2+1]; 1436 nz = ai[k+1] - ai[k]; 1437 while (nz--) { 1438 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1439 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1440 vj++; v += 4; 1441 } 1442 diag = aa+k*4; /* ptr to inv(Dk) */ 1443 t[k2] = diag[0]*x0 + diag[2]*x1; 1444 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1445 } 1446 1447 /* solve U*x = y by back substitution */ 1448 for (k=mbs-1; k>=0; k--){ 1449 v = aa + 4*ai[k]; 1450 vj = aj + ai[k]; 1451 k2 = k*2; 1452 x0 = t[k2]; x1 = t[k2+1]; 1453 nz = ai[k+1] - ai[k]; 1454 while (nz--) { 1455 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1456 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1457 vj++; v += 4; 1458 } 1459 t[k2] = x0; 1460 t[k2+1] = x1; 1461 idx = 2*r[k]; 1462 x[idx] = x0; 1463 x[idx+1] = x1; 1464 } 1465 1466 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1475 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1476 { 1477 MatScalar *v,*diag; 1478 PetscScalar x0,x1; 1479 PetscInt nz,*vj,k,k2; 1480 1481 PetscFunctionBegin; 1482 for (k=0; k<mbs; k++){ 1483 v = aa + 4*ai[k]; 1484 vj = aj + ai[k]; 1485 k2 = k*2; 1486 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1487 nz = ai[k+1] - ai[k]; 1488 1489 while (nz--) { 1490 /* x(:) += U(k,:)^T*(Dk*xk) */ 1491 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1492 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1493 vj++; v += 4; 1494 } 1495 /* xk = inv(Dk)*(Dk*xk) */ 1496 diag = aa+k*4; /* ptr to inv(Dk) */ 1497 x[k2] = diag[0]*x0 + diag[2]*x1; 1498 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1505 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1506 { 1507 MatScalar *v; 1508 PetscScalar x0,x1; 1509 PetscInt nz,*vj,k,k2; 1510 1511 PetscFunctionBegin; 1512 for (k=mbs-1; k>=0; k--){ 1513 v = aa + 4*ai[k]; 1514 vj = aj + ai[k]; 1515 k2 = k*2; 1516 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1517 nz = ai[k+1] - ai[k]; 1518 while (nz--) { 1519 /* xk += U(k,:)*x(:) */ 1520 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1521 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1522 vj++; v += 4; 1523 } 1524 x[k2] = x0; 1525 x[k2+1] = x1; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering" 1532 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1533 { 1534 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1535 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1536 MatScalar *aa=a->a; 1537 PetscScalar *x,*b; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 1544 /* solve U^T * D * y = b by forward substitution */ 1545 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1546 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1547 1548 /* solve U*x = y by back substitution */ 1549 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1550 1551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering" 1559 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1560 { 1561 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1562 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1563 MatScalar *aa=a->a; 1564 PetscScalar *x,*b; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1571 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1574 ierr = PetscLogFlops(4.0*(a->nz + mbs));CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering" 1580 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 1581 { 1582 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1583 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1584 MatScalar *aa=a->a; 1585 PetscScalar *x,*b; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1591 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1592 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1593 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1595 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_newdatastruct" 1601 PetscErrorCode MatSolve_SeqSBAIJ_1_newdatastruct(Mat A,Vec bb,Vec xx) 1602 { 1603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1604 IS isrow=a->row; 1605 PetscErrorCode ierr; 1606 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1607 const MatScalar *aa=a->a,*v; 1608 const PetscScalar *b; 1609 PetscScalar *x,xk,*t; 1610 PetscInt nz,k,j; 1611 1612 PetscFunctionBegin; 1613 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615 t = a->solve_work; 1616 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1617 1618 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1619 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1620 for (k=0; k<mbs; k++){ 1621 v = aa + ai[k]; 1622 vj = aj + ai[k]; 1623 xk = t[k]; 1624 nz = ai[k+1] - ai[k] - 1; 1625 for (j=0; j<nz; j++) t[vj[j]] += v[j]*xk; 1626 t[k] = xk*v[nz]; /* v[nz] = 1/D(k) */ 1627 } 1628 1629 /* solve U*perm(x) = y by back substitution */ 1630 for (k=mbs-1; k>=0; k--){ 1631 v = aa + ai[k]; 1632 vj = aj + ai[k]; 1633 nz = ai[k+1] - ai[k] - 1; 1634 /* for (j=0; j<nz; j++) t[k] += v[j]*t[vj[j]]; */ 1635 for (j=nz-1; j>=0; j--) t[k] += v[j]*t[vj[j]]; 1636 x[rp[k]] = t[k]; 1637 } 1638 1639 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1640 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1642 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1648 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1649 { 1650 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1651 IS isrow=a->row; 1652 PetscErrorCode ierr; 1653 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1654 const MatScalar *aa=a->a,*v; 1655 PetscScalar *x,*b,xk,*t; 1656 PetscInt nz,k; 1657 1658 PetscFunctionBegin; 1659 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1660 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1661 t = a->solve_work; 1662 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1663 1664 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1665 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1666 for (k=0; k<mbs; k++){ 1667 v = aa + ai[k] + 1; 1668 vj = aj + ai[k] + 1; 1669 xk = t[k]; 1670 nz = ai[k+1] - ai[k] - 1; 1671 while (nz--) t[*vj++] += (*v++) * xk; 1672 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1673 } 1674 1675 /* solve U*perm(x) = y by back substitution */ 1676 for (k=mbs-1; k>=0; k--){ 1677 v = aa + ai[k] + 1; 1678 vj = aj + ai[k] + 1; 1679 nz = ai[k+1] - ai[k] - 1; 1680 while (nz--) t[k] += (*v++) * t[*vj++]; 1681 x[rp[k]] = t[k]; 1682 } 1683 1684 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1685 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1687 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1693 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1694 { 1695 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1696 IS isrow=a->row; 1697 PetscErrorCode ierr; 1698 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1699 const MatScalar *aa=a->a,*v; 1700 PetscReal diagk; 1701 PetscScalar *x,*b,xk; 1702 PetscInt nz,k; 1703 1704 PetscFunctionBegin; 1705 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1706 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1707 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1708 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1709 1710 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1711 for (k=0; k<mbs; k++){ 1712 v = aa + ai[k] + 1; 1713 vj = aj + ai[k] + 1; 1714 xk = x[k]; 1715 nz = ai[k+1] - ai[k] - 1; 1716 while (nz--) x[*vj++] += (*v++) * xk; 1717 1718 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1719 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1720 x[k] = xk*sqrt(diagk); 1721 } 1722 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1725 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1731 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1732 { 1733 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1734 IS isrow=a->row; 1735 PetscErrorCode ierr; 1736 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1737 const MatScalar *aa=a->a,*v; 1738 PetscReal diagk; 1739 PetscScalar *x,*b,*t; 1740 PetscInt nz,k; 1741 1742 PetscFunctionBegin; 1743 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1744 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1745 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1746 t = a->solve_work; 1747 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1748 1749 for (k=mbs-1; k>=0; k--){ 1750 v = aa + ai[k] + 1; 1751 vj = aj + ai[k] + 1; 1752 diagk = PetscRealPart(aa[ai[k]]); 1753 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1754 t[k] = b[k] * sqrt(diagk); 1755 nz = ai[k+1] - ai[k] - 1; 1756 while (nz--) t[k] += (*v++) * t[*vj++]; 1757 x[rp[k]] = t[k]; 1758 } 1759 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1760 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1761 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1762 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1768 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1769 { 1770 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 if (A->rmap->bs == 1) { 1775 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1776 } else { 1777 IS isrow=a->row; 1778 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1779 const MatScalar *aa=a->a,*v; 1780 PetscScalar *x,*b,*t; 1781 PetscInt nz,k,n,i; 1782 if (bb->n > a->solves_work_n) { 1783 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1784 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1785 a->solves_work_n = bb->n; 1786 } 1787 n = bb->n; 1788 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1789 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1790 t = a->solves_work; 1791 1792 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1793 1794 /* solve U^T*D*y = perm(b) by forward substitution */ 1795 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1796 for (k=0; k<mbs; k++){ 1797 v = aa + ai[k]; 1798 vj = aj + ai[k]; 1799 nz = ai[k+1] - ai[k]; 1800 while (nz--) { 1801 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1802 v++;vj++; 1803 } 1804 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1805 } 1806 1807 /* solve U*perm(x) = y by back substitution */ 1808 for (k=mbs-1; k>=0; k--){ 1809 v = aa + ai[k]; 1810 vj = aj + ai[k]; 1811 nz = ai[k+1] - ai[k]; 1812 while (nz--) { 1813 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1814 v++;vj++; 1815 } 1816 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1817 } 1818 1819 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1820 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1821 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1822 ierr = PetscLogFlops(bb->n*(4.0*a->nz + A->rmap->N));CHKERRQ(ierr); 1823 } 1824 PetscFunctionReturn(0); 1825 } 1826 1827 /* 1828 Special case where the matrix was ILU(0) factored in the natural 1829 ordering. This eliminates the need for the column and row permutation. 1830 */ 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct" 1833 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 1834 { 1835 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1836 PetscErrorCode ierr; 1837 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj; 1838 const MatScalar *aa=a->a,*v; 1839 const PetscScalar *b; 1840 PetscScalar *x,xi; 1841 PetscInt nz,i,k,j; 1842 1843 PetscFunctionBegin; 1844 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1845 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1846 1847 /* solve U^T*D*y = b by forward substitution */ 1848 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1849 for (i=0; i<mbs; i++){ 1850 v = aa + ai[i]; 1851 vj = aj + ai[i]; 1852 xi = x[i]; 1853 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */ 1854 for (j=0; j<nz; j++){ 1855 x[vj[j]] += v[j]* xi; 1856 } 1857 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */ 1858 } 1859 1860 /* solve U*x = y by back substitution */ 1861 k = ai[mbs-1]-2; /* last entry of U(mbs-2,:), excluding diag[mbs-2] */ 1862 for (i=mbs-2; i>=0; i--){ 1863 xi = x[i]; 1864 nz = ai[i+1] - ai[i] - 1; 1865 for (j=0; j<nz; j++){ 1866 xi += aa[k]* x[aj[k]]; k--; 1867 } 1868 x[i] = xi; 1869 k--; /* skip diagonal[i] */ 1870 } 1871 1872 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1873 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1874 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1880 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1881 { 1882 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1883 PetscErrorCode ierr; 1884 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1885 MatScalar *aa=a->a,*v; 1886 PetscScalar *x,*b,xk; 1887 PetscInt nz,*vj,k; 1888 1889 PetscFunctionBegin; 1890 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1891 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1892 1893 /* solve U^T*D*y = b by forward substitution */ 1894 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1895 for (k=0; k<mbs; k++){ 1896 v = aa + ai[k] + 1; 1897 vj = aj + ai[k] + 1; 1898 xk = x[k]; 1899 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1900 while (nz--) x[*vj++] += (*v++) * xk; 1901 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1902 } 1903 1904 /* solve U*x = y by back substitution */ 1905 for (k=mbs-2; k>=0; k--){ 1906 v = aa + ai[k] + 1; 1907 vj = aj + ai[k] + 1; 1908 xk = x[k]; 1909 nz = ai[k+1] - ai[k] - 1; 1910 while (nz--) { 1911 xk += (*v++) * x[*vj++]; 1912 } 1913 x[k] = xk; 1914 } 1915 1916 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1917 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1918 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1924 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1925 { 1926 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1927 PetscErrorCode ierr; 1928 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1929 MatScalar *aa=a->a,*v; 1930 PetscReal diagk; 1931 PetscScalar *x,*b; 1932 PetscInt nz,*vj,k; 1933 1934 PetscFunctionBegin; 1935 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1936 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1937 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1938 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1939 for (k=0; k<mbs; k++){ 1940 v = aa + ai[k] + 1; 1941 vj = aj + ai[k] + 1; 1942 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1943 while (nz--) x[*vj++] += (*v++) * x[k]; 1944 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1945 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1946 x[k] *= sqrt(diagk); 1947 } 1948 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1949 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1950 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 1956 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1957 { 1958 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1959 PetscErrorCode ierr; 1960 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1961 MatScalar *aa=a->a,*v; 1962 PetscReal diagk; 1963 PetscScalar *x,*b; 1964 PetscInt nz,*vj,k; 1965 1966 PetscFunctionBegin; 1967 /* solve D^(1/2)*U*x = b by back substitution */ 1968 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1970 1971 for (k=mbs-1; k>=0; k--){ 1972 v = aa + ai[k] + 1; 1973 vj = aj + ai[k] + 1; 1974 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1975 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1976 x[k] = sqrt(diagk)*b[k]; 1977 nz = ai[k+1] - ai[k] - 1; 1978 while (nz--) x[k] += (*v++) * x[*vj++]; 1979 } 1980 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1981 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1982 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 1987 1988 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 1991 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 1992 { 1993 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 1994 PetscErrorCode ierr; 1995 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 1996 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 1997 PetscInt m,reallocs = 0,*levtmp; 1998 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 1999 PetscInt incrlev,*lev,shift,prow,nz; 2000 PetscReal f = info->fill,levels = info->levels; 2001 PetscTruth perm_identity; 2002 2003 PetscFunctionBegin; 2004 /* check whether perm is the identity mapping */ 2005 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2006 2007 if (perm_identity){ 2008 a->permute = PETSC_FALSE; 2009 ai = a->i; aj = a->j; 2010 } else { /* non-trivial permutation */ 2011 a->permute = PETSC_TRUE; 2012 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2013 ai = a->inew; aj = a->jnew; 2014 } 2015 2016 /* initialization */ 2017 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2018 umax = (PetscInt)(f*ai[mbs] + 1); 2019 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 2020 umax += mbs + 1; 2021 shift = mbs + 1; 2022 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 2023 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 2024 iu[0] = mbs + 1; 2025 juidx = mbs + 1; 2026 /* prowl: linked list for pivot row */ 2027 ierr = PetscMalloc3(mbs,PetscInt,&prowl,mbs,PetscInt,&q,mbs,PetscInt,&levtmp);CHKERRQ(ierr); 2028 /* q: linked list for col index */ 2029 2030 for (i=0; i<mbs; i++){ 2031 prowl[i] = mbs; 2032 q[i] = 0; 2033 } 2034 2035 /* for each row k */ 2036 for (k=0; k<mbs; k++){ 2037 nzk = 0; 2038 q[k] = mbs; 2039 /* copy current row into linked list */ 2040 nz = ai[rip[k]+1] - ai[rip[k]]; 2041 j = ai[rip[k]]; 2042 while (nz--){ 2043 vj = rip[aj[j++]]; 2044 if (vj > k){ 2045 qm = k; 2046 do { 2047 m = qm; qm = q[m]; 2048 } while(qm < vj); 2049 if (qm == vj) { 2050 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 2051 } 2052 nzk++; 2053 q[m] = vj; 2054 q[vj] = qm; 2055 levtmp[vj] = 0; /* initialize lev for nonzero element */ 2056 } 2057 } 2058 2059 /* modify nonzero structure of k-th row by computing fill-in 2060 for each row prow to be merged in */ 2061 prow = k; 2062 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 2063 2064 while (prow < k){ 2065 /* merge row prow into k-th row */ 2066 jmin = iu[prow] + 1; 2067 jmax = iu[prow+1]; 2068 qm = k; 2069 for (j=jmin; j<jmax; j++){ 2070 incrlev = lev[j-shift] + 1; 2071 if (incrlev > levels) continue; 2072 vj = ju[j]; 2073 do { 2074 m = qm; qm = q[m]; 2075 } while (qm < vj); 2076 if (qm != vj){ /* a fill */ 2077 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 2078 levtmp[vj] = incrlev; 2079 } else { 2080 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 2081 } 2082 } 2083 prow = prowl[prow]; /* next pivot row */ 2084 } 2085 2086 /* add k to row list for first nonzero element in k-th row */ 2087 if (nzk > 1){ 2088 i = q[k]; /* col value of first nonzero element in k_th row of U */ 2089 prowl[k] = prowl[i]; prowl[i] = k; 2090 } 2091 iu[k+1] = iu[k] + nzk; 2092 2093 /* allocate more space to ju and lev if needed */ 2094 if (iu[k+1] > umax) { 2095 /* estimate how much additional space we will need */ 2096 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2097 /* just double the memory each time */ 2098 maxadd = umax; 2099 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2100 umax += maxadd; 2101 2102 /* allocate a longer ju */ 2103 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2104 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2105 ierr = PetscFree(ju);CHKERRQ(ierr); 2106 ju = jutmp; 2107 2108 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2109 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscFree(lev);CHKERRQ(ierr); 2111 lev = jutmp; 2112 reallocs += 2; /* count how many times we realloc */ 2113 } 2114 2115 /* save nonzero structure of k-th row in ju */ 2116 i=k; 2117 while (nzk --) { 2118 i = q[i]; 2119 ju[juidx] = i; 2120 lev[juidx-shift] = levtmp[i]; 2121 juidx++; 2122 } 2123 } 2124 2125 #if defined(PETSC_USE_INFO) 2126 if (ai[mbs] != 0) { 2127 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2128 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2129 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2130 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2131 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2132 } else { 2133 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2134 } 2135 #endif 2136 2137 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2138 ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr); 2139 ierr = PetscFree(lev);CHKERRQ(ierr); 2140 2141 /* put together the new matrix */ 2142 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2143 2144 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2145 b = (Mat_SeqSBAIJ*)(B)->data; 2146 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2147 b->singlemalloc = PETSC_FALSE; 2148 b->free_a = PETSC_TRUE; 2149 b->free_ij = PETSC_TRUE; 2150 /* the next line frees the default space generated by the Create() */ 2151 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2152 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2153 b->j = ju; 2154 b->i = iu; 2155 b->diag = 0; 2156 b->ilen = 0; 2157 b->imax = 0; 2158 2159 if (b->row) { 2160 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2161 } 2162 if (b->icol) { 2163 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2164 } 2165 b->row = perm; 2166 b->icol = perm; 2167 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2168 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2169 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2170 /* In b structure: Free imax, ilen, old a, old j. 2171 Allocate idnew, solve_work, new a, new j */ 2172 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2173 b->maxnz = b->nz = iu[mbs]; 2174 2175 (B)->info.factor_mallocs = reallocs; 2176 (B)->info.fill_ratio_given = f; 2177 if (ai[mbs] != 0) { 2178 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2179 } else { 2180 (B)->info.fill_ratio_needed = 0.0; 2181 } 2182 ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #include "petscbt.h" 2187 #include "../src/mat/utils/freespace.h" 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2190 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2191 { 2192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2193 Mat_SeqSBAIJ *b; 2194 PetscErrorCode ierr; 2195 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2196 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2197 const PetscInt *cols,*rip,*ai,*aj; 2198 PetscInt reallocs=0,i,*ui; 2199 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2200 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2201 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2202 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2203 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2204 PetscBT lnkbt; 2205 2206 PetscFunctionBegin; 2207 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2208 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2209 2210 /* 2211 This code originally uses Modified Sparse Row (MSR) storage 2212 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2213 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2214 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2215 thus the original code in MSR format is still used for these cases. 2216 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2217 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2218 */ 2219 if (bs > 1){ 2220 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 /* check whether perm is the identity mapping */ 2225 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2226 2227 /* special case that simply copies fill pattern */ 2228 if (!levels && perm_identity) { 2229 a->permute = PETSC_FALSE; 2230 /* reuse the column pointers and row offsets for memory savings */ 2231 ui = a->i; 2232 uj = a->j; 2233 free_ij = PETSC_FALSE; 2234 ratio_needed = 1.0; 2235 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2236 if (perm_identity){ 2237 a->permute = PETSC_FALSE; 2238 ai = a->i; aj = a->j; 2239 } else { /* non-trivial permutation */ 2240 a->permute = PETSC_TRUE; 2241 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2242 ai = a->inew; aj = a->jnew; 2243 } 2244 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2245 2246 /* initialization */ 2247 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2248 ui[0] = 0; 2249 2250 /* jl: linked list for storing indices of the pivot rows 2251 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2252 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&il,am,PetscInt,&jl);CHKERRQ(ierr); 2253 for (i=0; i<am; i++){ 2254 jl[i] = am; il[i] = 0; 2255 } 2256 2257 /* create and initialize a linked list for storing column indices of the active row k */ 2258 nlnk = am + 1; 2259 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2260 2261 /* initial FreeSpace size is fill*(ai[am]+1) */ 2262 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2263 current_space = free_space; 2264 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2265 current_space_lvl = free_space_lvl; 2266 2267 for (k=0; k<am; k++){ /* for each active row k */ 2268 /* initialize lnk by the column indices of row rip[k] */ 2269 nzk = 0; 2270 ncols = ai[rip[k]+1] - ai[rip[k]]; 2271 cols = aj+ai[rip[k]]; 2272 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2273 nzk += nlnk; 2274 2275 /* update lnk by computing fill-in for each pivot row to be merged in */ 2276 prow = jl[k]; /* 1st pivot row */ 2277 2278 while (prow < k){ 2279 nextprow = jl[prow]; 2280 2281 /* merge prow into k-th row */ 2282 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2283 jmax = ui[prow+1]; 2284 ncols = jmax-jmin; 2285 i = jmin - ui[prow]; 2286 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2287 j = *(uj_lvl_ptr[prow] + i - 1); 2288 cols_lvl = uj_lvl_ptr[prow]+i; 2289 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2290 nzk += nlnk; 2291 2292 /* update il and jl for prow */ 2293 if (jmin < jmax){ 2294 il[prow] = jmin; 2295 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2296 } 2297 prow = nextprow; 2298 } 2299 2300 /* if free space is not available, make more free space */ 2301 if (current_space->local_remaining<nzk) { 2302 i = am - k + 1; /* num of unfactored rows */ 2303 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2304 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2305 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2306 reallocs++; 2307 } 2308 2309 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2310 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2311 2312 /* add the k-th row into il and jl */ 2313 if (nzk-1 > 0){ 2314 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2315 jl[k] = jl[i]; jl[i] = k; 2316 il[k] = ui[k] + 1; 2317 } 2318 uj_ptr[k] = current_space->array; 2319 uj_lvl_ptr[k] = current_space_lvl->array; 2320 2321 current_space->array += nzk; 2322 current_space->local_used += nzk; 2323 current_space->local_remaining -= nzk; 2324 current_space_lvl->array += nzk; 2325 current_space_lvl->local_used += nzk; 2326 current_space_lvl->local_remaining -= nzk; 2327 2328 ui[k+1] = ui[k] + nzk; 2329 } 2330 2331 #if defined(PETSC_USE_INFO) 2332 if (ai[am] != 0) { 2333 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2334 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2335 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2336 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2337 } else { 2338 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2339 } 2340 #endif 2341 2342 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2343 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2344 2345 /* destroy list of free space and other temporary array(s) */ 2346 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2347 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2348 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2349 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2350 if (ai[am] != 0) { 2351 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2352 } else { 2353 ratio_needed = 0.0; 2354 } 2355 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2356 2357 /* put together the new matrix in MATSEQSBAIJ format */ 2358 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2359 2360 b = (Mat_SeqSBAIJ*)(fact)->data; 2361 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2362 b->singlemalloc = PETSC_FALSE; 2363 b->free_a = PETSC_TRUE; 2364 b->free_ij = free_ij; 2365 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2366 b->j = uj; 2367 b->i = ui; 2368 b->diag = 0; 2369 b->ilen = 0; 2370 b->imax = 0; 2371 b->row = perm; 2372 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2373 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2374 b->icol = perm; 2375 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2376 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2377 b->maxnz = b->nz = ui[am]; 2378 2379 (fact)->info.factor_mallocs = reallocs; 2380 (fact)->info.fill_ratio_given = fill; 2381 (fact)->info.fill_ratio_needed = ratio_needed; 2382 ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386