1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_inplace" 13 PetscErrorCode MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 14 { 15 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 16 IS isrow=a->row; 17 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 18 PetscErrorCode ierr; 19 const PetscInt *r; 20 PetscInt nz,*vj,k,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_inplace" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_inplace" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 #if defined(PETSC_USE_LOG) 169 PetscInt bs2 = a->bs2; 170 #endif 171 172 PetscFunctionBegin; 173 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 176 /* solve U^T * D * y = b by forward substitution */ 177 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 178 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 /* solve U*x = y by back substitution */ 181 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 182 183 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 185 ierr = PetscLogFlops(bs2*(2.0*a->nz + mbs));CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 191 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 192 { 193 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 194 PetscErrorCode ierr; 195 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 196 PetscInt bs=A->rmap->bs; 197 MatScalar *aa=a->a; 198 PetscScalar *x,*b; 199 #if defined(PETSC_USE_LOG) 200 PetscInt bs2 = a->bs2; 201 #endif 202 203 PetscFunctionBegin; 204 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 207 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = PetscLogFlops(bs2*a->nz + A->rmap->N);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 216 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 217 { 218 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 221 PetscInt bs=A->rmap->bs; 222 MatScalar *aa=a->a; 223 PetscScalar *x,*b; 224 #if defined(PETSC_USE_LOG) 225 PetscInt bs2 = a->bs2; 226 #endif 227 228 PetscFunctionBegin; 229 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 232 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = PetscLogFlops(bs2*a->nz);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_inplace" 241 PetscErrorCode MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx) 242 { 243 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 244 IS isrow=a->row; 245 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 246 PetscErrorCode ierr; 247 const PetscInt *r; 248 PetscInt nz,*vj,k,idx; 249 MatScalar *aa=a->a,*v,*d; 250 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 251 252 PetscFunctionBegin; 253 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 t = a->solve_work; 256 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 257 258 /* solve U^T * D * y = b by forward substitution */ 259 tp = t; 260 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 261 idx = 7*r[k]; 262 tp[0] = b[idx]; 263 tp[1] = b[idx+1]; 264 tp[2] = b[idx+2]; 265 tp[3] = b[idx+3]; 266 tp[4] = b[idx+4]; 267 tp[5] = b[idx+5]; 268 tp[6] = b[idx+6]; 269 tp += 7; 270 } 271 272 for (k=0; k<mbs; k++){ 273 v = aa + 49*ai[k]; 274 vj = aj + ai[k]; 275 tp = t + k*7; 276 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 277 nz = ai[k+1] - ai[k]; 278 tp = t + (*vj)*7; 279 while (nz--) { 280 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 281 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 282 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 283 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 284 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 285 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 286 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 287 vj++; tp = t + (*vj)*7; 288 v += 49; 289 } 290 291 /* xk = inv(Dk)*(Dk*xk) */ 292 d = aa+k*49; /* ptr to inv(Dk) */ 293 tp = t + k*7; 294 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 295 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 296 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 297 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 298 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 299 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 300 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 301 } 302 303 /* solve U*x = y by back substitution */ 304 for (k=mbs-1; k>=0; k--){ 305 v = aa + 49*ai[k]; 306 vj = aj + ai[k]; 307 tp = t + k*7; 308 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 309 nz = ai[k+1] - ai[k]; 310 311 tp = t + (*vj)*7; 312 while (nz--) { 313 /* xk += U(k,:)*x(:) */ 314 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 315 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 316 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 317 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 318 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 319 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 320 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 321 vj++; tp = t + (*vj)*7; 322 v += 49; 323 } 324 tp = t + k*7; 325 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 326 idx = 7*r[k]; 327 x[idx] = x0; 328 x[idx+1] = x1; 329 x[idx+2] = x2; 330 x[idx+3] = x3; 331 x[idx+4] = x4; 332 x[idx+5] = x5; 333 x[idx+6] = x6; 334 } 335 336 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 337 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = PetscLogFlops(49.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 345 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 346 { 347 MatScalar *v,*d; 348 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 349 PetscInt nz,*vj,k; 350 351 PetscFunctionBegin; 352 for (k=0; k<mbs; k++){ 353 v = aa + 49*ai[k]; 354 xp = x + k*7; 355 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 356 nz = ai[k+1] - ai[k]; 357 vj = aj + ai[k]; 358 xp = x + (*vj)*7; 359 while (nz--) { 360 /* x(:) += U(k,:)^T*(Dk*xk) */ 361 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 362 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 363 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 364 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 365 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 366 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 367 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 368 vj++; xp = x + (*vj)*7; 369 v += 49; 370 } 371 /* xk = inv(Dk)*(Dk*xk) */ 372 d = aa+k*49; /* ptr to inv(Dk) */ 373 xp = x + k*7; 374 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 375 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 376 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 377 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 378 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 379 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 380 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 381 } 382 PetscFunctionReturn(0); 383 } 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 387 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 388 { 389 MatScalar *v; 390 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 391 PetscInt nz,*vj,k; 392 393 PetscFunctionBegin; 394 for (k=mbs-1; k>=0; k--){ 395 v = aa + 49*ai[k]; 396 xp = x + k*7; 397 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 398 nz = ai[k+1] - ai[k]; 399 vj = aj + ai[k]; 400 xp = x + (*vj)*7; 401 while (nz--) { 402 /* xk += U(k,:)*x(:) */ 403 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 404 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 405 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 406 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 407 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 408 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 409 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 410 vj++; 411 v += 49; xp = x + (*vj)*7; 412 } 413 xp = x + k*7; 414 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 415 } 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 421 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 422 { 423 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 424 PetscErrorCode ierr; 425 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 426 MatScalar *aa=a->a; 427 PetscScalar *x,*b; 428 429 PetscFunctionBegin; 430 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 433 /* solve U^T * D * y = b by forward substitution */ 434 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 435 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 436 437 /* solve U*x = y by back substitution */ 438 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 439 440 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = PetscLogFlops(49.0*(2*a->nz + mbs));CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 448 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 449 { 450 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 451 PetscErrorCode ierr; 452 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 453 MatScalar *aa=a->a; 454 PetscScalar *x,*b; 455 456 PetscFunctionBegin; 457 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 460 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 461 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 463 ierr = PetscLogFlops(49.0*a->nz + mbs);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 469 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 470 { 471 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 472 PetscErrorCode ierr; 473 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 474 MatScalar *aa=a->a; 475 PetscScalar *x,*b; 476 477 PetscFunctionBegin; 478 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 481 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 482 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 484 ierr = PetscLogFlops(49.0*a->nz);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_inplace" 490 PetscErrorCode MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx) 491 { 492 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 493 IS isrow=a->row; 494 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 495 PetscErrorCode ierr; 496 const PetscInt *r; 497 PetscInt nz,*vj,k,idx; 498 MatScalar *aa=a->a,*v,*d; 499 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 500 501 PetscFunctionBegin; 502 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 503 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 504 t = a->solve_work; 505 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 506 507 /* solve U^T * D * y = b by forward substitution */ 508 tp = t; 509 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 510 idx = 6*r[k]; 511 tp[0] = b[idx]; 512 tp[1] = b[idx+1]; 513 tp[2] = b[idx+2]; 514 tp[3] = b[idx+3]; 515 tp[4] = b[idx+4]; 516 tp[5] = b[idx+5]; 517 tp += 6; 518 } 519 520 for (k=0; k<mbs; k++){ 521 v = aa + 36*ai[k]; 522 vj = aj + ai[k]; 523 tp = t + k*6; 524 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 525 nz = ai[k+1] - ai[k]; 526 tp = t + (*vj)*6; 527 while (nz--) { 528 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 529 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 530 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 531 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 532 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 533 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 534 vj++; tp = t + (*vj)*6; 535 v += 36; 536 } 537 538 /* xk = inv(Dk)*(Dk*xk) */ 539 d = aa+k*36; /* ptr to inv(Dk) */ 540 tp = t + k*6; 541 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 542 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 543 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 544 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 545 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 546 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 547 } 548 549 /* solve U*x = y by back substitution */ 550 for (k=mbs-1; k>=0; k--){ 551 v = aa + 36*ai[k]; 552 vj = aj + ai[k]; 553 tp = t + k*6; 554 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 555 nz = ai[k+1] - ai[k]; 556 557 tp = t + (*vj)*6; 558 while (nz--) { 559 /* xk += U(k,:)*x(:) */ 560 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 561 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 562 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 563 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 564 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 565 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 566 vj++; tp = t + (*vj)*6; 567 v += 36; 568 } 569 tp = t + k*6; 570 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 571 idx = 6*r[k]; 572 x[idx] = x0; 573 x[idx+1] = x1; 574 x[idx+2] = x2; 575 x[idx+3] = x3; 576 x[idx+4] = x4; 577 x[idx+5] = x5; 578 } 579 580 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 581 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 582 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 583 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 589 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 590 { 591 MatScalar *v,*d; 592 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 593 PetscInt nz,*vj,k; 594 595 PetscFunctionBegin; 596 for (k=0; k<mbs; k++){ 597 v = aa + 36*ai[k]; 598 xp = x + k*6; 599 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 600 nz = ai[k+1] - ai[k]; 601 vj = aj + ai[k]; 602 xp = x + (*vj)*6; 603 while (nz--) { 604 /* x(:) += U(k,:)^T*(Dk*xk) */ 605 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 606 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 607 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 608 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 609 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 610 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 611 vj++; xp = x + (*vj)*6; 612 v += 36; 613 } 614 /* xk = inv(Dk)*(Dk*xk) */ 615 d = aa+k*36; /* ptr to inv(Dk) */ 616 xp = x + k*6; 617 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 618 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 619 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 620 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 621 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 622 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 623 } 624 PetscFunctionReturn(0); 625 } 626 #undef __FUNCT__ 627 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 628 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 629 { 630 MatScalar *v; 631 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 632 PetscInt nz,*vj,k; 633 634 PetscFunctionBegin; 635 for (k=mbs-1; k>=0; k--){ 636 v = aa + 36*ai[k]; 637 xp = x + k*6; 638 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 639 nz = ai[k+1] - ai[k]; 640 vj = aj + ai[k]; 641 xp = x + (*vj)*6; 642 while (nz--) { 643 /* xk += U(k,:)*x(:) */ 644 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 645 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 646 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 647 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 648 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 649 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 650 vj++; 651 v += 36; xp = x + (*vj)*6; 652 } 653 xp = x + k*6; 654 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 655 } 656 PetscFunctionReturn(0); 657 } 658 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 662 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 663 { 664 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 665 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 666 MatScalar *aa=a->a; 667 PetscScalar *x,*b; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 672 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 673 674 /* solve U^T * D * y = b by forward substitution */ 675 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 676 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 677 678 /* solve U*x = y by back substitution */ 679 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 680 681 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 683 ierr = PetscLogFlops(36.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 689 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 690 { 691 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 692 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 693 MatScalar *aa=a->a; 694 PetscScalar *x,*b; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 699 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 700 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 701 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 ierr = PetscLogFlops(36.0*a->nz + mbs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 710 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 711 { 712 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 713 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 714 MatScalar *aa=a->a; 715 PetscScalar *x,*b; 716 PetscErrorCode ierr; 717 718 PetscFunctionBegin; 719 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 722 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_inplace" 731 PetscErrorCode MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx) 732 { 733 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 734 IS isrow=a->row; 735 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 736 PetscErrorCode ierr; 737 const PetscInt *r; 738 PetscInt nz,*vj,k,idx; 739 MatScalar *aa=a->a,*v,*diag; 740 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 745 t = a->solve_work; 746 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 747 748 /* solve U^T * D * y = b by forward substitution */ 749 tp = t; 750 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 751 idx = 5*r[k]; 752 tp[0] = b[idx]; 753 tp[1] = b[idx+1]; 754 tp[2] = b[idx+2]; 755 tp[3] = b[idx+3]; 756 tp[4] = b[idx+4]; 757 tp += 5; 758 } 759 760 for (k=0; k<mbs; k++){ 761 v = aa + 25*ai[k]; 762 vj = aj + ai[k]; 763 tp = t + k*5; 764 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 765 nz = ai[k+1] - ai[k]; 766 767 tp = t + (*vj)*5; 768 while (nz--) { 769 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 770 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 771 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 772 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 773 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 774 vj++; tp = t + (*vj)*5; 775 v += 25; 776 } 777 778 /* xk = inv(Dk)*(Dk*xk) */ 779 diag = aa+k*25; /* ptr to inv(Dk) */ 780 tp = t + k*5; 781 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 782 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 783 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 784 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 785 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 786 } 787 788 /* solve U*x = y by back substitution */ 789 for (k=mbs-1; k>=0; k--){ 790 v = aa + 25*ai[k]; 791 vj = aj + ai[k]; 792 tp = t + k*5; 793 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 794 nz = ai[k+1] - ai[k]; 795 796 tp = t + (*vj)*5; 797 while (nz--) { 798 /* xk += U(k,:)*x(:) */ 799 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 800 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 801 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 802 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 803 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 804 vj++; tp = t + (*vj)*5; 805 v += 25; 806 } 807 tp = t + k*5; 808 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 809 idx = 5*r[k]; 810 x[idx] = x0; 811 x[idx+1] = x1; 812 x[idx+2] = x2; 813 x[idx+3] = x3; 814 x[idx+4] = x4; 815 } 816 817 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 826 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 827 { 828 MatScalar *v,*diag; 829 PetscScalar *xp,x0,x1,x2,x3,x4; 830 PetscInt nz,*vj,k; 831 832 PetscFunctionBegin; 833 for (k=0; k<mbs; k++){ 834 v = aa + 25*ai[k]; 835 xp = x + k*5; 836 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 837 nz = ai[k+1] - ai[k]; 838 vj = aj + ai[k]; 839 xp = x + (*vj)*5; 840 while (nz--) { 841 /* x(:) += U(k,:)^T*(Dk*xk) */ 842 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 843 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 844 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 845 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 846 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 847 vj++; xp = x + (*vj)*5; 848 v += 25; 849 } 850 /* xk = inv(Dk)*(Dk*xk) */ 851 diag = aa+k*25; /* ptr to inv(Dk) */ 852 xp = x + k*5; 853 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 854 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 855 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 856 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 857 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 858 } 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 864 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 865 { 866 MatScalar *v; 867 PetscScalar *xp,x0,x1,x2,x3,x4; 868 PetscInt nz,*vj,k; 869 870 PetscFunctionBegin; 871 for (k=mbs-1; k>=0; k--){ 872 v = aa + 25*ai[k]; 873 xp = x + k*5; 874 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 875 nz = ai[k+1] - ai[k]; 876 vj = aj + ai[k]; 877 xp = x + (*vj)*5; 878 while (nz--) { 879 /* xk += U(k,:)*x(:) */ 880 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 881 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 882 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 883 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 884 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 885 vj++; 886 v += 25; xp = x + (*vj)*5; 887 } 888 xp = x + k*5; 889 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 890 } 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 896 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 897 { 898 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 899 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 900 MatScalar *aa=a->a; 901 PetscScalar *x,*b; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 908 /* solve U^T * D * y = b by forward substitution */ 909 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 910 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 911 912 /* solve U*x = y by back substitution */ 913 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 914 915 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 917 ierr = PetscLogFlops(25.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 923 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 924 { 925 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 926 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 927 MatScalar *aa=a->a; 928 PetscScalar *x,*b; 929 PetscErrorCode ierr; 930 931 PetscFunctionBegin; 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 935 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 936 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 938 ierr = PetscLogFlops(25.0*(a->nz + mbs));CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 944 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 945 { 946 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 947 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 948 MatScalar *aa=a->a; 949 PetscScalar *x,*b; 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 956 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 957 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 ierr = PetscLogFlops(25.0*a->nz);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_inplace" 965 PetscErrorCode MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx) 966 { 967 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 968 IS isrow=a->row; 969 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 970 PetscErrorCode ierr; 971 const PetscInt *r; 972 PetscInt nz,*vj,k,idx; 973 MatScalar *aa=a->a,*v,*diag; 974 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 975 976 PetscFunctionBegin; 977 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 979 t = a->solve_work; 980 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 981 982 /* solve U^T * D * y = b by forward substitution */ 983 tp = t; 984 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 985 idx = 4*r[k]; 986 tp[0] = b[idx]; 987 tp[1] = b[idx+1]; 988 tp[2] = b[idx+2]; 989 tp[3] = b[idx+3]; 990 tp += 4; 991 } 992 993 for (k=0; k<mbs; k++){ 994 v = aa + 16*ai[k]; 995 vj = aj + ai[k]; 996 tp = t + k*4; 997 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 998 nz = ai[k+1] - ai[k]; 999 1000 tp = t + (*vj)*4; 1001 while (nz--) { 1002 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1003 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1004 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1005 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1006 vj++; tp = t + (*vj)*4; 1007 v += 16; 1008 } 1009 1010 /* xk = inv(Dk)*(Dk*xk) */ 1011 diag = aa+k*16; /* ptr to inv(Dk) */ 1012 tp = t + k*4; 1013 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1014 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1015 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1016 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1017 } 1018 1019 /* solve U*x = y by back substitution */ 1020 for (k=mbs-1; k>=0; k--){ 1021 v = aa + 16*ai[k]; 1022 vj = aj + ai[k]; 1023 tp = t + k*4; 1024 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1025 nz = ai[k+1] - ai[k]; 1026 1027 tp = t + (*vj)*4; 1028 while (nz--) { 1029 /* xk += U(k,:)*x(:) */ 1030 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1031 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1032 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1033 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1034 vj++; tp = t + (*vj)*4; 1035 v += 16; 1036 } 1037 tp = t + k*4; 1038 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1039 idx = 4*r[k]; 1040 x[idx] = x0; 1041 x[idx+1] = x1; 1042 x[idx+2] = x2; 1043 x[idx+3] = x3; 1044 } 1045 1046 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1049 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1055 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1056 { 1057 MatScalar *v,*diag; 1058 PetscScalar *xp,x0,x1,x2,x3; 1059 PetscInt nz,*vj,k; 1060 1061 PetscFunctionBegin; 1062 for (k=0; k<mbs; k++){ 1063 v = aa + 16*ai[k]; 1064 xp = x + k*4; 1065 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1066 nz = ai[k+1] - ai[k]; 1067 vj = aj + ai[k]; 1068 xp = x + (*vj)*4; 1069 while (nz--) { 1070 /* x(:) += U(k,:)^T*(Dk*xk) */ 1071 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1072 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1073 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1074 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1075 vj++; xp = x + (*vj)*4; 1076 v += 16; 1077 } 1078 /* xk = inv(Dk)*(Dk*xk) */ 1079 diag = aa+k*16; /* ptr to inv(Dk) */ 1080 xp = x + k*4; 1081 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1082 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1083 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1084 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1091 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1092 { 1093 MatScalar *v; 1094 PetscScalar *xp,x0,x1,x2,x3; 1095 PetscInt nz,*vj,k; 1096 1097 PetscFunctionBegin; 1098 for (k=mbs-1; k>=0; k--){ 1099 v = aa + 16*ai[k]; 1100 xp = x + k*4; 1101 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1102 nz = ai[k+1] - ai[k]; 1103 vj = aj + ai[k]; 1104 xp = x + (*vj)*4; 1105 while (nz--) { 1106 /* xk += U(k,:)*x(:) */ 1107 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1108 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1109 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1110 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1111 vj++; 1112 v += 16; xp = x + (*vj)*4; 1113 } 1114 xp = x + k*4; 1115 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1116 } 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1122 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1123 { 1124 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1125 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1126 MatScalar *aa=a->a; 1127 PetscScalar *x,*b; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1133 1134 /* solve U^T * D * y = b by forward substitution */ 1135 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1136 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1137 1138 /* solve U*x = y by back substitution */ 1139 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(16.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1148 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1151 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1152 MatScalar *aa=a->a; 1153 PetscScalar *x,*b; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1160 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1163 ierr = PetscLogFlops(16.0*a->nz + mbs);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1169 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1170 { 1171 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1172 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1173 MatScalar *aa=a->a; 1174 PetscScalar *x,*b; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1180 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1181 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1182 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_inplace" 1190 PetscErrorCode MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1193 IS isrow=a->row; 1194 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1195 PetscErrorCode ierr; 1196 const PetscInt *r; 1197 PetscInt nz,*vj,k,idx; 1198 MatScalar *aa=a->a,*v,*diag; 1199 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1200 1201 PetscFunctionBegin; 1202 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1203 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1204 t = a->solve_work; 1205 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1206 1207 /* solve U^T * D * y = b by forward substitution */ 1208 tp = t; 1209 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1210 idx = 3*r[k]; 1211 tp[0] = b[idx]; 1212 tp[1] = b[idx+1]; 1213 tp[2] = b[idx+2]; 1214 tp += 3; 1215 } 1216 1217 for (k=0; k<mbs; k++){ 1218 v = aa + 9*ai[k]; 1219 vj = aj + ai[k]; 1220 tp = t + k*3; 1221 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1222 nz = ai[k+1] - ai[k]; 1223 1224 tp = t + (*vj)*3; 1225 while (nz--) { 1226 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1227 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1228 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1229 vj++; tp = t + (*vj)*3; 1230 v += 9; 1231 } 1232 1233 /* xk = inv(Dk)*(Dk*xk) */ 1234 diag = aa+k*9; /* ptr to inv(Dk) */ 1235 tp = t + k*3; 1236 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1237 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1238 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1239 } 1240 1241 /* solve U*x = y by back substitution */ 1242 for (k=mbs-1; k>=0; k--){ 1243 v = aa + 9*ai[k]; 1244 vj = aj + ai[k]; 1245 tp = t + k*3; 1246 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1247 nz = ai[k+1] - ai[k]; 1248 1249 tp = t + (*vj)*3; 1250 while (nz--) { 1251 /* xk += U(k,:)*x(:) */ 1252 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1253 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1254 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1255 vj++; tp = t + (*vj)*3; 1256 v += 9; 1257 } 1258 tp = t + k*3; 1259 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1260 idx = 3*r[k]; 1261 x[idx] = x0; 1262 x[idx+1] = x1; 1263 x[idx+2] = x2; 1264 } 1265 1266 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1275 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1276 { 1277 MatScalar *v,*diag; 1278 PetscScalar *xp,x0,x1,x2; 1279 PetscInt nz,*vj,k; 1280 1281 PetscFunctionBegin; 1282 for (k=0; k<mbs; k++){ 1283 v = aa + 9*ai[k]; 1284 xp = x + k*3; 1285 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1286 nz = ai[k+1] - ai[k]; 1287 vj = aj + ai[k]; 1288 xp = x + (*vj)*3; 1289 while (nz--) { 1290 /* x(:) += U(k,:)^T*(Dk*xk) */ 1291 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1292 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1293 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1294 vj++; xp = x + (*vj)*3; 1295 v += 9; 1296 } 1297 /* xk = inv(Dk)*(Dk*xk) */ 1298 diag = aa+k*9; /* ptr to inv(Dk) */ 1299 xp = x + k*3; 1300 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1301 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1302 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1309 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1310 { 1311 MatScalar *v; 1312 PetscScalar *xp,x0,x1,x2; 1313 PetscInt nz,*vj,k; 1314 1315 PetscFunctionBegin; 1316 for (k=mbs-1; k>=0; k--){ 1317 v = aa + 9*ai[k]; 1318 xp = x + k*3; 1319 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1320 nz = ai[k+1] - ai[k]; 1321 vj = aj + ai[k]; 1322 xp = x + (*vj)*3; 1323 while (nz--) { 1324 /* xk += U(k,:)*x(:) */ 1325 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1326 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1327 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1328 vj++; 1329 v += 9; xp = x + (*vj)*3; 1330 } 1331 xp = x + k*3; 1332 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1333 } 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1339 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1340 { 1341 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1342 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1343 MatScalar *aa=a->a; 1344 PetscScalar *x,*b; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 1351 /* solve U^T * D * y = b by forward substitution */ 1352 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1354 1355 /* solve U*x = y by back substitution */ 1356 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1357 1358 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1360 ierr = PetscLogFlops(9.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1366 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1367 { 1368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1369 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1370 MatScalar *aa=a->a; 1371 PetscScalar *x,*b; 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1376 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1377 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1378 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1379 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1381 ierr = PetscLogFlops(9.0*(a->nz + mbs));CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1387 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1388 { 1389 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1390 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1391 MatScalar *aa=a->a; 1392 PetscScalar *x,*b; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1397 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1398 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1399 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1400 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1402 ierr = PetscLogFlops(9.0*a->nz);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_inplace" 1408 PetscErrorCode MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx) 1409 { 1410 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1411 IS isrow=a->row; 1412 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1413 PetscErrorCode ierr; 1414 const PetscInt *r; 1415 PetscInt nz,*vj,k,k2,idx; 1416 MatScalar *aa=a->a,*v,*diag; 1417 PetscScalar *x,*b,x0,x1,*t; 1418 1419 PetscFunctionBegin; 1420 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1421 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1422 t = a->solve_work; 1423 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1424 1425 /* solve U^T * D * y = perm(b) by forward substitution */ 1426 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1427 idx = 2*r[k]; 1428 t[k*2] = b[idx]; 1429 t[k*2+1] = b[idx+1]; 1430 } 1431 for (k=0; k<mbs; k++){ 1432 v = aa + 4*ai[k]; 1433 vj = aj + ai[k]; 1434 k2 = k*2; 1435 x0 = t[k2]; x1 = t[k2+1]; 1436 nz = ai[k+1] - ai[k]; 1437 while (nz--) { 1438 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1439 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1440 vj++; v += 4; 1441 } 1442 diag = aa+k*4; /* ptr to inv(Dk) */ 1443 t[k2] = diag[0]*x0 + diag[2]*x1; 1444 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1445 } 1446 1447 /* solve U*x = y by back substitution */ 1448 for (k=mbs-1; k>=0; k--){ 1449 v = aa + 4*ai[k]; 1450 vj = aj + ai[k]; 1451 k2 = k*2; 1452 x0 = t[k2]; x1 = t[k2+1]; 1453 nz = ai[k+1] - ai[k]; 1454 while (nz--) { 1455 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1456 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1457 vj++; v += 4; 1458 } 1459 t[k2] = x0; 1460 t[k2+1] = x1; 1461 idx = 2*r[k]; 1462 x[idx] = x0; 1463 x[idx+1] = x1; 1464 } 1465 1466 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1475 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1476 { 1477 MatScalar *v,*diag; 1478 PetscScalar x0,x1; 1479 PetscInt nz,*vj,k,k2; 1480 1481 PetscFunctionBegin; 1482 for (k=0; k<mbs; k++){ 1483 v = aa + 4*ai[k]; 1484 vj = aj + ai[k]; 1485 k2 = k*2; 1486 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1487 nz = ai[k+1] - ai[k]; 1488 1489 while (nz--) { 1490 /* x(:) += U(k,:)^T*(Dk*xk) */ 1491 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1492 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1493 vj++; v += 4; 1494 } 1495 /* xk = inv(Dk)*(Dk*xk) */ 1496 diag = aa+k*4; /* ptr to inv(Dk) */ 1497 x[k2] = diag[0]*x0 + diag[2]*x1; 1498 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1505 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1506 { 1507 MatScalar *v; 1508 PetscScalar x0,x1; 1509 PetscInt nz,*vj,k,k2; 1510 1511 PetscFunctionBegin; 1512 for (k=mbs-1; k>=0; k--){ 1513 v = aa + 4*ai[k]; 1514 vj = aj + ai[k]; 1515 k2 = k*2; 1516 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1517 nz = ai[k+1] - ai[k]; 1518 while (nz--) { 1519 /* xk += U(k,:)*x(:) */ 1520 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1521 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1522 vj++; v += 4; 1523 } 1524 x[k2] = x0; 1525 x[k2+1] = x1; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1532 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1533 { 1534 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1535 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1536 MatScalar *aa=a->a; 1537 PetscScalar *x,*b; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 1544 /* solve U^T * D * y = b by forward substitution */ 1545 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1546 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1547 1548 /* solve U*x = y by back substitution */ 1549 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1550 1551 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 ierr = PetscLogFlops(4.0*(2.0*a->nz + mbs));CHKERRQ(ierr); /* bs2*(2*a->nz + mbs) */ 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1559 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1560 { 1561 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1562 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1563 MatScalar *aa=a->a; 1564 PetscScalar *x,*b; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1571 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1574 ierr = PetscLogFlops(4.0*(a->nz + mbs));CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1580 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1581 { 1582 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1583 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1584 MatScalar *aa=a->a; 1585 PetscScalar *x,*b; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1591 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1592 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1593 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1595 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1601 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1602 { 1603 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1604 IS isrow=a->row; 1605 PetscErrorCode ierr; 1606 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1607 const MatScalar *aa=a->a,*v; 1608 const PetscScalar *b; 1609 PetscScalar *x,xk,*t; 1610 PetscInt nz,k,j; 1611 1612 PetscFunctionBegin; 1613 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1615 t = a->solve_work; 1616 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1617 1618 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1619 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1620 for (k=0; k<mbs; k++){ 1621 v = aa + ai[k]; 1622 vj = aj + ai[k]; 1623 xk = t[k]; 1624 nz = ai[k+1] - ai[k] - 1; 1625 for (j=0; j<nz; j++) t[vj[j]] += v[j]*xk; 1626 t[k] = xk*v[nz]; /* v[nz] = 1/D(k) */ 1627 } 1628 1629 /* solve U*perm(x) = y by back substitution */ 1630 for (k=mbs-1; k>=0; k--){ 1631 v = aa + adiag[k] - 1; 1632 vj = aj + adiag[k] - 1; 1633 nz = ai[k+1] - ai[k] - 1; 1634 for (j=0; j<nz; j++) t[k] += v[-j]*t[vj[-j]]; 1635 x[rp[k]] = t[k]; 1636 } 1637 1638 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1639 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1640 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1641 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_inplace" 1647 PetscErrorCode MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1648 { 1649 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1650 IS isrow=a->row; 1651 PetscErrorCode ierr; 1652 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1653 const MatScalar *aa=a->a,*v; 1654 PetscScalar *x,*b,xk,*t; 1655 PetscInt nz,k; 1656 1657 PetscFunctionBegin; 1658 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1660 t = a->solve_work; 1661 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1662 1663 /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */ 1664 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1665 for (k=0; k<mbs; k++){ 1666 v = aa + ai[k] + 1; 1667 vj = aj + ai[k] + 1; 1668 xk = t[k]; 1669 nz = ai[k+1] - ai[k] - 1; 1670 while (nz--) t[*vj++] += (*v++) * xk; 1671 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1672 } 1673 1674 /* solve U*perm(x) = y by back substitution */ 1675 for (k=mbs-1; k>=0; k--){ 1676 v = aa + ai[k] + 1; 1677 vj = aj + ai[k] + 1; 1678 nz = ai[k+1] - ai[k] - 1; 1679 while (nz--) t[k] += (*v++) * t[*vj++]; 1680 x[rp[k]] = t[k]; 1681 } 1682 1683 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1684 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1685 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1686 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1692 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1693 { 1694 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1695 IS isrow=a->row; 1696 PetscErrorCode ierr; 1697 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1698 const MatScalar *aa=a->a,*v; 1699 PetscReal diagk; 1700 PetscScalar *x,*b,xk; 1701 PetscInt nz,k; 1702 1703 PetscFunctionBegin; 1704 SETERRQ(1,"Not done yet"); 1705 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1706 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1707 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1708 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1709 1710 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1711 for (k=0; k<mbs; k++){ 1712 v = aa + ai[k] + 1; 1713 vj = aj + ai[k] + 1; 1714 xk = x[k]; 1715 nz = ai[k+1] - ai[k] - 1; 1716 while (nz--) x[*vj++] += (*v++) * xk; 1717 1718 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1719 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1720 x[k] = xk*sqrt(diagk); 1721 } 1722 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1725 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_inplace" 1731 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1732 { 1733 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1734 IS isrow=a->row; 1735 PetscErrorCode ierr; 1736 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1737 const MatScalar *aa=a->a,*v; 1738 PetscReal diagk; 1739 PetscScalar *x,*b,xk; 1740 PetscInt nz,k; 1741 1742 PetscFunctionBegin; 1743 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1744 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1745 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1746 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1747 1748 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1749 for (k=0; k<mbs; k++){ 1750 v = aa + ai[k] + 1; 1751 vj = aj + ai[k] + 1; 1752 xk = x[k]; 1753 nz = ai[k+1] - ai[k] - 1; 1754 while (nz--) x[*vj++] += (*v++) * xk; 1755 1756 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1757 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1758 x[k] = xk*sqrt(diagk); 1759 } 1760 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1761 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1762 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1763 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1769 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1770 { 1771 SETERRQ(1,"Not done yet"); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_inplace" 1776 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1777 { 1778 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1779 IS isrow=a->row; 1780 PetscErrorCode ierr; 1781 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1782 const MatScalar *aa=a->a,*v; 1783 PetscReal diagk; 1784 PetscScalar *x,*b,*t; 1785 PetscInt nz,k; 1786 1787 PetscFunctionBegin; 1788 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1789 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1790 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1791 t = a->solve_work; 1792 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1793 1794 for (k=mbs-1; k>=0; k--){ 1795 v = aa + ai[k] + 1; 1796 vj = aj + ai[k] + 1; 1797 diagk = PetscRealPart(aa[ai[k]]); 1798 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1799 t[k] = b[k] * sqrt(diagk); 1800 nz = ai[k+1] - ai[k] - 1; 1801 while (nz--) t[k] += (*v++) * t[*vj++]; 1802 x[rp[k]] = t[k]; 1803 } 1804 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1807 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatSolves_SeqSBAIJ_1_inplace" 1813 PetscErrorCode MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx) 1814 { 1815 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 if (A->rmap->bs == 1) { 1820 ierr = MatSolve_SeqSBAIJ_1_inplace(A,bb->v,xx->v);CHKERRQ(ierr); 1821 } else { 1822 IS isrow=a->row; 1823 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1824 const MatScalar *aa=a->a,*v; 1825 PetscScalar *x,*b,*t; 1826 PetscInt nz,k,n,i; 1827 if (bb->n > a->solves_work_n) { 1828 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1829 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1830 a->solves_work_n = bb->n; 1831 } 1832 n = bb->n; 1833 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1834 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1835 t = a->solves_work; 1836 1837 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1838 1839 /* solve U^T*D*y = perm(b) by forward substitution */ 1840 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1841 for (k=0; k<mbs; k++){ 1842 v = aa + ai[k]; 1843 vj = aj + ai[k]; 1844 nz = ai[k+1] - ai[k]; 1845 while (nz--) { 1846 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1847 v++;vj++; 1848 } 1849 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1850 } 1851 1852 /* solve U*perm(x) = y by back substitution */ 1853 for (k=mbs-1; k>=0; k--){ 1854 v = aa + ai[k]; 1855 vj = aj + ai[k]; 1856 nz = ai[k+1] - ai[k]; 1857 while (nz--) { 1858 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1859 v++;vj++; 1860 } 1861 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1862 } 1863 1864 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1865 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1866 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1867 ierr = PetscLogFlops(bb->n*(4.0*a->nz + A->rmap->N));CHKERRQ(ierr); 1868 } 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1874 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1875 { 1876 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1877 PetscErrorCode ierr; 1878 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag = a->diag; 1879 const MatScalar *aa=a->a,*v; 1880 const PetscScalar *b; 1881 PetscScalar *x,xi; 1882 PetscInt nz,i,j; 1883 1884 PetscFunctionBegin; 1885 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1886 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1887 1888 /* solve U^T*D*y = b by forward substitution */ 1889 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1890 for (i=0; i<mbs; i++){ 1891 v = aa + ai[i]; 1892 vj = aj + ai[i]; 1893 xi = x[i]; 1894 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */ 1895 for (j=0; j<nz; j++){ 1896 x[vj[j]] += v[j]* xi; 1897 } 1898 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */ 1899 } 1900 1901 /* solve U*x = y by backward substitution */ 1902 for (i=mbs-2; i>=0; i--){ 1903 xi = x[i]; 1904 v = aa + adiag[i] - 1; /* end of row i, excluding diag */ 1905 vj = aj + adiag[i] - 1; 1906 nz = ai[i+1] - ai[i] - 1; 1907 for (j=0; j<nz; j++) xi += v[-j]*x[vj[-j]]; 1908 x[i] = xi; 1909 } 1910 1911 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1912 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1913 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 1919 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1920 { 1921 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1922 PetscErrorCode ierr; 1923 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1924 MatScalar *aa=a->a,*v; 1925 PetscScalar *x,*b,xk; 1926 PetscInt nz,*vj,k; 1927 1928 PetscFunctionBegin; 1929 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1930 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1931 1932 /* solve U^T*D*y = b by forward substitution */ 1933 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1934 for (k=0; k<mbs; k++){ 1935 v = aa + ai[k] + 1; 1936 vj = aj + ai[k] + 1; 1937 xk = x[k]; 1938 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1939 while (nz--) x[*vj++] += (*v++) * xk; 1940 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 1941 } 1942 1943 /* solve U*x = y by back substitution */ 1944 for (k=mbs-2; k>=0; k--){ 1945 v = aa + ai[k] + 1; 1946 vj = aj + ai[k] + 1; 1947 xk = x[k]; 1948 nz = ai[k+1] - ai[k] - 1; 1949 while (nz--) { 1950 xk += (*v++) * x[*vj++]; 1951 } 1952 x[k] = xk; 1953 } 1954 1955 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1956 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1957 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 1963 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1964 { 1965 SETERRQ(1,"Not done yet"); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 1970 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1971 { 1972 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1973 PetscErrorCode ierr; 1974 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 1975 MatScalar *aa=a->a,*v; 1976 PetscReal diagk; 1977 PetscScalar *x,*b; 1978 PetscInt nz,*vj,k; 1979 1980 PetscFunctionBegin; 1981 /* solve U^T*D^(1/2)*x = b by forward substitution */ 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1984 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1985 for (k=0; k<mbs; k++){ 1986 v = aa + ai[k] + 1; 1987 vj = aj + ai[k] + 1; 1988 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 1989 while (nz--) x[*vj++] += (*v++) * x[k]; 1990 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1991 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 1992 x[k] *= sqrt(diagk); 1993 } 1994 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1995 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1996 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 2002 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2003 { 2004 SETERRQ(1,"Not done yet"); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2009 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2010 { 2011 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2012 PetscErrorCode ierr; 2013 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 2014 MatScalar *aa=a->a,*v; 2015 PetscReal diagk; 2016 PetscScalar *x,*b; 2017 PetscInt nz,*vj,k; 2018 2019 PetscFunctionBegin; 2020 /* solve D^(1/2)*U*x = b by back substitution */ 2021 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2022 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2023 2024 for (k=mbs-1; k>=0; k--){ 2025 v = aa + ai[k] + 1; 2026 vj = aj + ai[k] + 1; 2027 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2028 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 2029 x[k] = sqrt(diagk)*b[k]; 2030 nz = ai[k+1] - ai[k] - 1; 2031 while (nz--) x[k] += (*v++) * x[*vj++]; 2032 } 2033 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2034 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2035 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 2040 2041 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 2044 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 2045 { 2046 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2047 PetscErrorCode ierr; 2048 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 2049 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 2050 PetscInt m,reallocs = 0,*levtmp; 2051 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 2052 PetscInt incrlev,*lev,shift,prow,nz; 2053 PetscReal f = info->fill,levels = info->levels; 2054 PetscTruth perm_identity; 2055 2056 PetscFunctionBegin; 2057 /* check whether perm is the identity mapping */ 2058 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2059 2060 if (perm_identity){ 2061 a->permute = PETSC_FALSE; 2062 ai = a->i; aj = a->j; 2063 } else { /* non-trivial permutation */ 2064 a->permute = PETSC_TRUE; 2065 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2066 ai = a->inew; aj = a->jnew; 2067 } 2068 2069 /* initialization */ 2070 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2071 umax = (PetscInt)(f*ai[mbs] + 1); 2072 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 2073 umax += mbs + 1; 2074 shift = mbs + 1; 2075 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 2076 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 2077 iu[0] = mbs + 1; 2078 juidx = mbs + 1; 2079 /* prowl: linked list for pivot row */ 2080 ierr = PetscMalloc3(mbs,PetscInt,&prowl,mbs,PetscInt,&q,mbs,PetscInt,&levtmp);CHKERRQ(ierr); 2081 /* q: linked list for col index */ 2082 2083 for (i=0; i<mbs; i++){ 2084 prowl[i] = mbs; 2085 q[i] = 0; 2086 } 2087 2088 /* for each row k */ 2089 for (k=0; k<mbs; k++){ 2090 nzk = 0; 2091 q[k] = mbs; 2092 /* copy current row into linked list */ 2093 nz = ai[rip[k]+1] - ai[rip[k]]; 2094 j = ai[rip[k]]; 2095 while (nz--){ 2096 vj = rip[aj[j++]]; 2097 if (vj > k){ 2098 qm = k; 2099 do { 2100 m = qm; qm = q[m]; 2101 } while(qm < vj); 2102 if (qm == vj) { 2103 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 2104 } 2105 nzk++; 2106 q[m] = vj; 2107 q[vj] = qm; 2108 levtmp[vj] = 0; /* initialize lev for nonzero element */ 2109 } 2110 } 2111 2112 /* modify nonzero structure of k-th row by computing fill-in 2113 for each row prow to be merged in */ 2114 prow = k; 2115 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 2116 2117 while (prow < k){ 2118 /* merge row prow into k-th row */ 2119 jmin = iu[prow] + 1; 2120 jmax = iu[prow+1]; 2121 qm = k; 2122 for (j=jmin; j<jmax; j++){ 2123 incrlev = lev[j-shift] + 1; 2124 if (incrlev > levels) continue; 2125 vj = ju[j]; 2126 do { 2127 m = qm; qm = q[m]; 2128 } while (qm < vj); 2129 if (qm != vj){ /* a fill */ 2130 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 2131 levtmp[vj] = incrlev; 2132 } else { 2133 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 2134 } 2135 } 2136 prow = prowl[prow]; /* next pivot row */ 2137 } 2138 2139 /* add k to row list for first nonzero element in k-th row */ 2140 if (nzk > 1){ 2141 i = q[k]; /* col value of first nonzero element in k_th row of U */ 2142 prowl[k] = prowl[i]; prowl[i] = k; 2143 } 2144 iu[k+1] = iu[k] + nzk; 2145 2146 /* allocate more space to ju and lev if needed */ 2147 if (iu[k+1] > umax) { 2148 /* estimate how much additional space we will need */ 2149 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2150 /* just double the memory each time */ 2151 maxadd = umax; 2152 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2153 umax += maxadd; 2154 2155 /* allocate a longer ju */ 2156 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2157 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2158 ierr = PetscFree(ju);CHKERRQ(ierr); 2159 ju = jutmp; 2160 2161 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2162 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2163 ierr = PetscFree(lev);CHKERRQ(ierr); 2164 lev = jutmp; 2165 reallocs += 2; /* count how many times we realloc */ 2166 } 2167 2168 /* save nonzero structure of k-th row in ju */ 2169 i=k; 2170 while (nzk --) { 2171 i = q[i]; 2172 ju[juidx] = i; 2173 lev[juidx-shift] = levtmp[i]; 2174 juidx++; 2175 } 2176 } 2177 2178 #if defined(PETSC_USE_INFO) 2179 if (ai[mbs] != 0) { 2180 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2181 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2182 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2183 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2184 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2185 } else { 2186 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2187 } 2188 #endif 2189 2190 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2191 ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr); 2192 ierr = PetscFree(lev);CHKERRQ(ierr); 2193 2194 /* put together the new matrix */ 2195 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2196 2197 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2198 b = (Mat_SeqSBAIJ*)(B)->data; 2199 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2200 b->singlemalloc = PETSC_FALSE; 2201 b->free_a = PETSC_TRUE; 2202 b->free_ij = PETSC_TRUE; 2203 /* the next line frees the default space generated by the Create() */ 2204 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2205 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2206 b->j = ju; 2207 b->i = iu; 2208 b->diag = 0; 2209 b->ilen = 0; 2210 b->imax = 0; 2211 2212 if (b->row) { 2213 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2214 } 2215 if (b->icol) { 2216 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2217 } 2218 b->row = perm; 2219 b->icol = perm; 2220 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2221 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2222 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2223 /* In b structure: Free imax, ilen, old a, old j. 2224 Allocate idnew, solve_work, new a, new j */ 2225 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2226 b->maxnz = b->nz = iu[mbs]; 2227 2228 (B)->info.factor_mallocs = reallocs; 2229 (B)->info.fill_ratio_given = f; 2230 if (ai[mbs] != 0) { 2231 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2232 } else { 2233 (B)->info.fill_ratio_needed = 0.0; 2234 } 2235 ierr = MatSeqSBAIJSetNumericFactorization(B,perm_identity);CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #include "petscbt.h" 2240 #include "../src/mat/utils/freespace.h" 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2243 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2244 { 2245 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2246 Mat_SeqSBAIJ *b; 2247 PetscErrorCode ierr; 2248 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2249 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2250 const PetscInt *cols,*rip,*ai,*aj; 2251 PetscInt reallocs=0,i,*ui; 2252 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2253 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2254 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2255 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2256 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2257 PetscBT lnkbt; 2258 2259 PetscFunctionBegin; 2260 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2261 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2262 2263 /* 2264 This code originally uses Modified Sparse Row (MSR) storage 2265 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2266 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2267 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2268 thus the original code in MSR format is still used for these cases. 2269 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2270 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2271 */ 2272 if (bs > 1){ 2273 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 /* check whether perm is the identity mapping */ 2278 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2279 2280 /* special case that simply copies fill pattern */ 2281 if (!levels && perm_identity) { 2282 a->permute = PETSC_FALSE; 2283 /* reuse the column pointers and row offsets for memory savings */ 2284 ui = a->i; 2285 uj = a->j; 2286 free_ij = PETSC_FALSE; 2287 ratio_needed = 1.0; 2288 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2289 if (perm_identity){ 2290 a->permute = PETSC_FALSE; 2291 ai = a->i; aj = a->j; 2292 } else { /* non-trivial permutation */ 2293 a->permute = PETSC_TRUE; 2294 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2295 ai = a->inew; aj = a->jnew; 2296 } 2297 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2298 2299 /* initialization */ 2300 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2301 ui[0] = 0; 2302 2303 /* jl: linked list for storing indices of the pivot rows 2304 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2305 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&il,am,PetscInt,&jl);CHKERRQ(ierr); 2306 for (i=0; i<am; i++){ 2307 jl[i] = am; il[i] = 0; 2308 } 2309 2310 /* create and initialize a linked list for storing column indices of the active row k */ 2311 nlnk = am + 1; 2312 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2313 2314 /* initial FreeSpace size is fill*(ai[am]+1) */ 2315 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2316 current_space = free_space; 2317 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2318 current_space_lvl = free_space_lvl; 2319 2320 for (k=0; k<am; k++){ /* for each active row k */ 2321 /* initialize lnk by the column indices of row rip[k] */ 2322 nzk = 0; 2323 ncols = ai[rip[k]+1] - ai[rip[k]]; 2324 cols = aj+ai[rip[k]]; 2325 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2326 nzk += nlnk; 2327 2328 /* update lnk by computing fill-in for each pivot row to be merged in */ 2329 prow = jl[k]; /* 1st pivot row */ 2330 2331 while (prow < k){ 2332 nextprow = jl[prow]; 2333 2334 /* merge prow into k-th row */ 2335 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2336 jmax = ui[prow+1]; 2337 ncols = jmax-jmin; 2338 i = jmin - ui[prow]; 2339 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2340 j = *(uj_lvl_ptr[prow] + i - 1); 2341 cols_lvl = uj_lvl_ptr[prow]+i; 2342 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2343 nzk += nlnk; 2344 2345 /* update il and jl for prow */ 2346 if (jmin < jmax){ 2347 il[prow] = jmin; 2348 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2349 } 2350 prow = nextprow; 2351 } 2352 2353 /* if free space is not available, make more free space */ 2354 if (current_space->local_remaining<nzk) { 2355 i = am - k + 1; /* num of unfactored rows */ 2356 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2357 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2358 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2359 reallocs++; 2360 } 2361 2362 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2363 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2364 2365 /* add the k-th row into il and jl */ 2366 if (nzk-1 > 0){ 2367 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2368 jl[k] = jl[i]; jl[i] = k; 2369 il[k] = ui[k] + 1; 2370 } 2371 uj_ptr[k] = current_space->array; 2372 uj_lvl_ptr[k] = current_space_lvl->array; 2373 2374 current_space->array += nzk; 2375 current_space->local_used += nzk; 2376 current_space->local_remaining -= nzk; 2377 current_space_lvl->array += nzk; 2378 current_space_lvl->local_used += nzk; 2379 current_space_lvl->local_remaining -= nzk; 2380 2381 ui[k+1] = ui[k] + nzk; 2382 } 2383 2384 #if defined(PETSC_USE_INFO) 2385 if (ai[am] != 0) { 2386 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2387 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2388 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2389 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2390 } else { 2391 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2392 } 2393 #endif 2394 2395 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2396 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2397 2398 /* destroy list of free space and other temporary array(s) */ 2399 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2400 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2401 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2402 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2403 if (ai[am] != 0) { 2404 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2405 } else { 2406 ratio_needed = 0.0; 2407 } 2408 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2409 2410 /* put together the new matrix in MATSEQSBAIJ format */ 2411 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2412 2413 b = (Mat_SeqSBAIJ*)(fact)->data; 2414 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2415 b->singlemalloc = PETSC_FALSE; 2416 b->free_a = PETSC_TRUE; 2417 b->free_ij = free_ij; 2418 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2419 b->j = uj; 2420 b->i = ui; 2421 b->diag = 0; 2422 b->ilen = 0; 2423 b->imax = 0; 2424 b->row = perm; 2425 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2426 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2427 b->icol = perm; 2428 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2429 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2430 b->maxnz = b->nz = ui[am]; 2431 2432 (fact)->info.factor_mallocs = reallocs; 2433 (fact)->info.fill_ratio_given = fill; 2434 (fact)->info.fill_ratio_needed = ratio_needed; 2435 ierr = MatSeqSBAIJSetNumericFactorization(fact,perm_identity);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439