1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for SBAIJ format. 5 */ 6 7 #include "../src/mat/impls/sbaij/seq/sbaij.h" 8 #include "../src/mat/impls/baij/seq/baij.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_inplace" 13 PetscErrorCode MatSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 14 { 15 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 16 IS isrow=a->row; 17 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 18 PetscErrorCode ierr; 19 const PetscInt *r; 20 PetscInt nz,*vj,k,idx,k1; 21 PetscInt bs=A->rmap->bs,bs2 = a->bs2; 22 MatScalar *aa=a->a,*v,*diag; 23 PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 t = a->solve_work; 29 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 30 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 31 32 /* solve U^T * D * y = b by forward substitution */ 33 xk = t; 34 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 35 idx = bs*r[k]; 36 for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1]; 37 } 38 for (k=0; k<mbs; k++){ 39 v = aa + bs2*ai[k]; 40 xk = t + k*bs; /* Dk*xk = k-th block of x */ 41 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 42 nz = ai[k+1] - ai[k]; 43 vj = aj + ai[k]; 44 xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */ 45 while (nz--) { 46 /* x(:) += U(k,:)^T*(Dk*xk) */ 47 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 48 vj++; xj = t + (*vj)*bs; 49 v += bs2; 50 } 51 /* xk = inv(Dk)*(Dk*xk) */ 52 diag = aa+k*bs2; /* ptr to inv(Dk) */ 53 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 54 } 55 56 /* solve U*x = y by back substitution */ 57 for (k=mbs-1; k>=0; k--){ 58 v = aa + bs2*ai[k]; 59 xk = t + k*bs; /* xk */ 60 nz = ai[k+1] - ai[k]; 61 vj = aj + ai[k]; 62 xj = t + (*vj)*bs; 63 while (nz--) { 64 /* xk += U(k,:)*x(:) */ 65 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 66 vj++; 67 v += bs2; xj = t + (*vj)*bs; 68 } 69 idx = bs*r[k]; 70 for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++; 71 } 72 73 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 74 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 75 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 76 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 77 ierr = PetscLogFlops(4.0*bs2*a->nz -(bs+2.0*bs2)*mbs);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_inplace" 83 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"not implemented yet"); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_inplace" 92 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 93 { 94 PetscFunctionBegin; 95 SETERRQ(1,"not implemented yet"); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 101 PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 102 { 103 PetscErrorCode ierr; 104 PetscInt nz,*vj,k; 105 PetscInt bs2 = bs*bs; 106 MatScalar *v,*diag; 107 PetscScalar *xk,*xj,*xk_tmp; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);CHKERRQ(ierr); 111 for (k=0; k<mbs; k++){ 112 v = aa + bs2*ai[k]; 113 xk = x + k*bs; /* Dk*xk = k-th block of x */ 114 ierr = PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* xk_tmp <- xk */ 115 nz = ai[k+1] - ai[k]; 116 vj = aj + ai[k]; 117 xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */ 118 while (nz--) { 119 /* x(:) += U(k,:)^T*(Dk*xk) */ 120 Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */ 121 vj++; xj = x + (*vj)*bs; 122 v += bs2; 123 } 124 /* xk = inv(Dk)*(Dk*xk) */ 125 diag = aa+k*bs2; /* ptr to inv(Dk) */ 126 Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */ 127 } 128 ierr = PetscFree(xk_tmp);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private" 134 PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x) 135 { 136 PetscInt nz,*vj,k; 137 PetscInt bs2 = bs*bs; 138 MatScalar *v; 139 PetscScalar *xk,*xj; 140 141 PetscFunctionBegin; 142 for (k=mbs-1; k>=0; k--){ 143 v = aa + bs2*ai[k]; 144 xk = x + k*bs; /* xk */ 145 nz = ai[k+1] - ai[k]; 146 vj = aj + ai[k]; 147 xj = x + (*vj)*bs; 148 while (nz--) { 149 /* xk += U(k,:)*x(:) */ 150 Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */ 151 vj++; 152 v += bs2; xj = x + (*vj)*bs; 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 160 PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 161 { 162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 163 PetscErrorCode ierr; 164 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 165 PetscInt bs=A->rmap->bs; 166 MatScalar *aa=a->a; 167 PetscScalar *x,*b; 168 169 PetscFunctionBegin; 170 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 173 /* solve U^T * D * y = b by forward substitution */ 174 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 175 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 176 177 /* solve U*x = y by back substitution */ 178 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 179 180 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 182 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 188 PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 189 { 190 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 191 PetscErrorCode ierr; 192 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 193 PetscInt bs=A->rmap->bs; 194 MatScalar *aa=a->a; 195 PetscScalar *x,*b; 196 197 PetscFunctionBegin; 198 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 199 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 200 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 201 ierr = ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 202 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace" 210 PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 211 { 212 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 213 PetscErrorCode ierr; 214 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 215 PetscInt bs=A->rmap->bs; 216 MatScalar *aa=a->a; 217 PetscScalar *x,*b; 218 219 PetscFunctionBegin; 220 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 221 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 222 ierr = PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 223 ierr = BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 225 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 226 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_inplace" 232 PetscErrorCode MatSolve_SeqSBAIJ_7_inplace(Mat A,Vec bb,Vec xx) 233 { 234 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 235 IS isrow=a->row; 236 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2,bs=A->rmap->bs; 237 PetscErrorCode ierr; 238 const PetscInt *r; 239 PetscInt nz,*vj,k,idx; 240 MatScalar *aa=a->a,*v,*d; 241 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp; 242 243 PetscFunctionBegin; 244 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 245 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 246 t = a->solve_work; 247 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 248 249 /* solve U^T * D * y = b by forward substitution */ 250 tp = t; 251 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 252 idx = 7*r[k]; 253 tp[0] = b[idx]; 254 tp[1] = b[idx+1]; 255 tp[2] = b[idx+2]; 256 tp[3] = b[idx+3]; 257 tp[4] = b[idx+4]; 258 tp[5] = b[idx+5]; 259 tp[6] = b[idx+6]; 260 tp += 7; 261 } 262 263 for (k=0; k<mbs; k++){ 264 v = aa + 49*ai[k]; 265 vj = aj + ai[k]; 266 tp = t + k*7; 267 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; 268 nz = ai[k+1] - ai[k]; 269 tp = t + (*vj)*7; 270 while (nz--) { 271 tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 272 tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 273 tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 274 tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 275 tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 276 tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 277 tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 278 vj++; tp = t + (*vj)*7; 279 v += 49; 280 } 281 282 /* xk = inv(Dk)*(Dk*xk) */ 283 d = aa+k*49; /* ptr to inv(Dk) */ 284 tp = t + k*7; 285 tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 286 tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 287 tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 288 tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 289 tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 290 tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 291 tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 292 } 293 294 /* solve U*x = y by back substitution */ 295 for (k=mbs-1; k>=0; k--){ 296 v = aa + 49*ai[k]; 297 vj = aj + ai[k]; 298 tp = t + k*7; 299 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */ 300 nz = ai[k+1] - ai[k]; 301 302 tp = t + (*vj)*7; 303 while (nz--) { 304 /* xk += U(k,:)*x(:) */ 305 x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6]; 306 x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6]; 307 x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6]; 308 x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6]; 309 x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6]; 310 x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6]; 311 x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6]; 312 vj++; tp = t + (*vj)*7; 313 v += 49; 314 } 315 tp = t + k*7; 316 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6; 317 idx = 7*r[k]; 318 x[idx] = x0; 319 x[idx+1] = x1; 320 x[idx+2] = x2; 321 x[idx+3] = x3; 322 x[idx+4] = x4; 323 x[idx+5] = x5; 324 x[idx+6] = x6; 325 } 326 327 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 328 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 329 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 330 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 336 PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 337 { 338 MatScalar *v,*d; 339 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 340 PetscInt nz,*vj,k; 341 342 PetscFunctionBegin; 343 for (k=0; k<mbs; k++){ 344 v = aa + 49*ai[k]; 345 xp = x + k*7; 346 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */ 347 nz = ai[k+1] - ai[k]; 348 vj = aj + ai[k]; 349 xp = x + (*vj)*7; 350 while (nz--) { 351 /* x(:) += U(k,:)^T*(Dk*xk) */ 352 xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6; 353 xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6; 354 xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6; 355 xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6; 356 xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6; 357 xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6; 358 xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6; 359 vj++; xp = x + (*vj)*7; 360 v += 49; 361 } 362 /* xk = inv(Dk)*(Dk*xk) */ 363 d = aa+k*49; /* ptr to inv(Dk) */ 364 xp = x + k*7; 365 xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6; 366 xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6; 367 xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6; 368 xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6; 369 xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6; 370 xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6; 371 xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6; 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private" 378 PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 379 { 380 MatScalar *v; 381 PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6; 382 PetscInt nz,*vj,k; 383 384 PetscFunctionBegin; 385 for (k=mbs-1; k>=0; k--){ 386 v = aa + 49*ai[k]; 387 xp = x + k*7; 388 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */ 389 nz = ai[k+1] - ai[k]; 390 vj = aj + ai[k]; 391 xp = x + (*vj)*7; 392 while (nz--) { 393 /* xk += U(k,:)*x(:) */ 394 x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6]; 395 x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6]; 396 x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6]; 397 x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6]; 398 x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6]; 399 x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6]; 400 x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6]; 401 vj++; 402 v += 49; xp = x + (*vj)*7; 403 } 404 xp = x + k*7; 405 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6; 406 } 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 412 PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 413 { 414 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 415 PetscErrorCode ierr; 416 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 417 MatScalar *aa=a->a; 418 PetscScalar *x,*b; 419 420 PetscFunctionBegin; 421 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 422 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 423 424 /* solve U^T * D * y = b by forward substitution */ 425 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 426 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 427 428 /* solve U*x = y by back substitution */ 429 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 430 431 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 432 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 433 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 439 PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 440 { 441 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 442 PetscErrorCode ierr; 443 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 444 MatScalar *aa=a->a; 445 PetscScalar *x,*b; 446 447 PetscFunctionBegin; 448 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 449 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 450 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 451 ierr = ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 452 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 453 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 454 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace" 460 PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 461 { 462 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 463 PetscErrorCode ierr; 464 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 465 MatScalar *aa=a->a; 466 PetscScalar *x,*b; 467 468 PetscFunctionBegin; 469 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 472 ierr = BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 473 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 474 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 475 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_inplace" 481 PetscErrorCode MatSolve_SeqSBAIJ_6_inplace(Mat A,Vec bb,Vec xx) 482 { 483 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 484 IS isrow=a->row; 485 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 486 PetscErrorCode ierr; 487 const PetscInt *r; 488 PetscInt nz,*vj,k,idx; 489 MatScalar *aa=a->a,*v,*d; 490 PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp; 491 492 PetscFunctionBegin; 493 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 494 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 495 t = a->solve_work; 496 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 497 498 /* solve U^T * D * y = b by forward substitution */ 499 tp = t; 500 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 501 idx = 6*r[k]; 502 tp[0] = b[idx]; 503 tp[1] = b[idx+1]; 504 tp[2] = b[idx+2]; 505 tp[3] = b[idx+3]; 506 tp[4] = b[idx+4]; 507 tp[5] = b[idx+5]; 508 tp += 6; 509 } 510 511 for (k=0; k<mbs; k++){ 512 v = aa + 36*ai[k]; 513 vj = aj + ai[k]; 514 tp = t + k*6; 515 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; 516 nz = ai[k+1] - ai[k]; 517 tp = t + (*vj)*6; 518 while (nz--) { 519 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 520 tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 521 tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 522 tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 523 tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 524 tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 525 vj++; tp = t + (*vj)*6; 526 v += 36; 527 } 528 529 /* xk = inv(Dk)*(Dk*xk) */ 530 d = aa+k*36; /* ptr to inv(Dk) */ 531 tp = t + k*6; 532 tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 533 tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 534 tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 535 tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 536 tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 537 tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 538 } 539 540 /* solve U*x = y by back substitution */ 541 for (k=mbs-1; k>=0; k--){ 542 v = aa + 36*ai[k]; 543 vj = aj + ai[k]; 544 tp = t + k*6; 545 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */ 546 nz = ai[k+1] - ai[k]; 547 548 tp = t + (*vj)*6; 549 while (nz--) { 550 /* xk += U(k,:)*x(:) */ 551 x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5]; 552 x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5]; 553 x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5]; 554 x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5]; 555 x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5]; 556 x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5]; 557 vj++; tp = t + (*vj)*6; 558 v += 36; 559 } 560 tp = t + k*6; 561 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; 562 idx = 6*r[k]; 563 x[idx] = x0; 564 x[idx+1] = x1; 565 x[idx+2] = x2; 566 x[idx+3] = x3; 567 x[idx+4] = x4; 568 x[idx+5] = x5; 569 } 570 571 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 572 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 574 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 580 PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 581 { 582 MatScalar *v,*d; 583 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 584 PetscInt nz,*vj,k; 585 586 PetscFunctionBegin; 587 for (k=0; k<mbs; k++){ 588 v = aa + 36*ai[k]; 589 xp = x + k*6; 590 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */ 591 nz = ai[k+1] - ai[k]; 592 vj = aj + ai[k]; 593 xp = x + (*vj)*6; 594 while (nz--) { 595 /* x(:) += U(k,:)^T*(Dk*xk) */ 596 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5; 597 xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5; 598 xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5; 599 xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5; 600 xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5; 601 xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5; 602 vj++; xp = x + (*vj)*6; 603 v += 36; 604 } 605 /* xk = inv(Dk)*(Dk*xk) */ 606 d = aa+k*36; /* ptr to inv(Dk) */ 607 xp = x + k*6; 608 xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5; 609 xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5; 610 xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5; 611 xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5; 612 xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5; 613 xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5; 614 } 615 PetscFunctionReturn(0); 616 } 617 #undef __FUNCT__ 618 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private" 619 PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 620 { 621 MatScalar *v; 622 PetscScalar *xp,x0,x1,x2,x3,x4,x5; 623 PetscInt nz,*vj,k; 624 625 PetscFunctionBegin; 626 for (k=mbs-1; k>=0; k--){ 627 v = aa + 36*ai[k]; 628 xp = x + k*6; 629 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */ 630 nz = ai[k+1] - ai[k]; 631 vj = aj + ai[k]; 632 xp = x + (*vj)*6; 633 while (nz--) { 634 /* xk += U(k,:)*x(:) */ 635 x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5]; 636 x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5]; 637 x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5]; 638 x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5]; 639 x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5]; 640 x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5]; 641 vj++; 642 v += 36; xp = x + (*vj)*6; 643 } 644 xp = x + k*6; 645 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; 646 } 647 PetscFunctionReturn(0); 648 } 649 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 653 PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 654 { 655 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 656 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 657 MatScalar *aa=a->a; 658 PetscScalar *x,*b; 659 PetscErrorCode ierr; 660 661 PetscFunctionBegin; 662 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 665 /* solve U^T * D * y = b by forward substitution */ 666 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 667 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 668 669 /* solve U*x = y by back substitution */ 670 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 671 672 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 674 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 680 PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 681 { 682 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 683 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 684 MatScalar *aa=a->a; 685 PetscScalar *x,*b; 686 PetscErrorCode ierr; 687 688 PetscFunctionBegin; 689 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 692 ierr = ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 693 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 694 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 695 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace" 701 PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 702 { 703 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 704 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 705 MatScalar *aa=a->a; 706 PetscScalar *x,*b; 707 PetscErrorCode ierr; 708 709 PetscFunctionBegin; 710 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 ierr = PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 713 ierr = BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 714 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 715 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 716 ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_inplace" 722 PetscErrorCode MatSolve_SeqSBAIJ_5_inplace(Mat A,Vec bb,Vec xx) 723 { 724 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 725 IS isrow=a->row; 726 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; 727 PetscErrorCode ierr; 728 const PetscInt *r; 729 PetscInt nz,*vj,k,idx; 730 MatScalar *aa=a->a,*v,*diag; 731 PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp; 732 733 PetscFunctionBegin; 734 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 735 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 736 t = a->solve_work; 737 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 738 739 /* solve U^T * D * y = b by forward substitution */ 740 tp = t; 741 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 742 idx = 5*r[k]; 743 tp[0] = b[idx]; 744 tp[1] = b[idx+1]; 745 tp[2] = b[idx+2]; 746 tp[3] = b[idx+3]; 747 tp[4] = b[idx+4]; 748 tp += 5; 749 } 750 751 for (k=0; k<mbs; k++){ 752 v = aa + 25*ai[k]; 753 vj = aj + ai[k]; 754 tp = t + k*5; 755 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; 756 nz = ai[k+1] - ai[k]; 757 758 tp = t + (*vj)*5; 759 while (nz--) { 760 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 761 tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 762 tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4; 763 tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4; 764 tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4; 765 vj++; tp = t + (*vj)*5; 766 v += 25; 767 } 768 769 /* xk = inv(Dk)*(Dk*xk) */ 770 diag = aa+k*25; /* ptr to inv(Dk) */ 771 tp = t + k*5; 772 tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 773 tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 774 tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 775 tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 776 tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 777 } 778 779 /* solve U*x = y by back substitution */ 780 for (k=mbs-1; k>=0; k--){ 781 v = aa + 25*ai[k]; 782 vj = aj + ai[k]; 783 tp = t + k*5; 784 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */ 785 nz = ai[k+1] - ai[k]; 786 787 tp = t + (*vj)*5; 788 while (nz--) { 789 /* xk += U(k,:)*x(:) */ 790 x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4]; 791 x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4]; 792 x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4]; 793 x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4]; 794 x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4]; 795 vj++; tp = t + (*vj)*5; 796 v += 25; 797 } 798 tp = t + k*5; 799 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; 800 idx = 5*r[k]; 801 x[idx] = x0; 802 x[idx+1] = x1; 803 x[idx+2] = x2; 804 x[idx+3] = x3; 805 x[idx+4] = x4; 806 } 807 808 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 809 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 811 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 817 PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 818 { 819 MatScalar *v,*diag; 820 PetscScalar *xp,x0,x1,x2,x3,x4; 821 PetscInt nz,*vj,k; 822 823 PetscFunctionBegin; 824 for (k=0; k<mbs; k++){ 825 v = aa + 25*ai[k]; 826 xp = x + k*5; 827 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */ 828 nz = ai[k+1] - ai[k]; 829 vj = aj + ai[k]; 830 xp = x + (*vj)*5; 831 while (nz--) { 832 /* x(:) += U(k,:)^T*(Dk*xk) */ 833 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4; 834 xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4; 835 xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4; 836 xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4; 837 xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4; 838 vj++; xp = x + (*vj)*5; 839 v += 25; 840 } 841 /* xk = inv(Dk)*(Dk*xk) */ 842 diag = aa+k*25; /* ptr to inv(Dk) */ 843 xp = x + k*5; 844 xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 845 xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 846 xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 847 xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 848 xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 849 } 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private" 855 PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 856 { 857 MatScalar *v; 858 PetscScalar *xp,x0,x1,x2,x3,x4; 859 PetscInt nz,*vj,k; 860 861 PetscFunctionBegin; 862 for (k=mbs-1; k>=0; k--){ 863 v = aa + 25*ai[k]; 864 xp = x + k*5; 865 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */ 866 nz = ai[k+1] - ai[k]; 867 vj = aj + ai[k]; 868 xp = x + (*vj)*5; 869 while (nz--) { 870 /* xk += U(k,:)*x(:) */ 871 x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4]; 872 x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4]; 873 x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4]; 874 x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4]; 875 x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4]; 876 vj++; 877 v += 25; xp = x + (*vj)*5; 878 } 879 xp = x + k*5; 880 xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; 881 } 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 887 PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 888 { 889 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 890 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; 891 MatScalar *aa=a->a; 892 PetscScalar *x,*b; 893 PetscErrorCode ierr; 894 895 PetscFunctionBegin; 896 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 897 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 898 899 /* solve U^T * D * y = b by forward substitution */ 900 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 901 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 902 903 /* solve U*x = y by back substitution */ 904 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 905 906 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 907 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 908 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 914 PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 915 { 916 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 917 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 918 MatScalar *aa=a->a; 919 PetscScalar *x,*b; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 924 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 925 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 926 ierr = ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 927 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace" 935 PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 936 { 937 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 938 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 939 MatScalar *aa=a->a; 940 PetscScalar *x,*b; 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 947 ierr = BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 948 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 949 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 950 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_inplace" 956 PetscErrorCode MatSolve_SeqSBAIJ_4_inplace(Mat A,Vec bb,Vec xx) 957 { 958 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 959 IS isrow=a->row; 960 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 961 PetscErrorCode ierr; 962 const PetscInt *r; 963 PetscInt nz,*vj,k,idx; 964 MatScalar *aa=a->a,*v,*diag; 965 PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp; 966 967 PetscFunctionBegin; 968 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 970 t = a->solve_work; 971 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 972 973 /* solve U^T * D * y = b by forward substitution */ 974 tp = t; 975 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 976 idx = 4*r[k]; 977 tp[0] = b[idx]; 978 tp[1] = b[idx+1]; 979 tp[2] = b[idx+2]; 980 tp[3] = b[idx+3]; 981 tp += 4; 982 } 983 984 for (k=0; k<mbs; k++){ 985 v = aa + 16*ai[k]; 986 vj = aj + ai[k]; 987 tp = t + k*4; 988 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; 989 nz = ai[k+1] - ai[k]; 990 991 tp = t + (*vj)*4; 992 while (nz--) { 993 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 994 tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 995 tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 996 tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 997 vj++; tp = t + (*vj)*4; 998 v += 16; 999 } 1000 1001 /* xk = inv(Dk)*(Dk*xk) */ 1002 diag = aa+k*16; /* ptr to inv(Dk) */ 1003 tp = t + k*4; 1004 tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1005 tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1006 tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1007 tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1008 } 1009 1010 /* solve U*x = y by back substitution */ 1011 for (k=mbs-1; k>=0; k--){ 1012 v = aa + 16*ai[k]; 1013 vj = aj + ai[k]; 1014 tp = t + k*4; 1015 x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */ 1016 nz = ai[k+1] - ai[k]; 1017 1018 tp = t + (*vj)*4; 1019 while (nz--) { 1020 /* xk += U(k,:)*x(:) */ 1021 x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3]; 1022 x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3]; 1023 x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3]; 1024 x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3]; 1025 vj++; tp = t + (*vj)*4; 1026 v += 16; 1027 } 1028 tp = t + k*4; 1029 tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; 1030 idx = 4*r[k]; 1031 x[idx] = x0; 1032 x[idx+1] = x1; 1033 x[idx+2] = x2; 1034 x[idx+3] = x3; 1035 } 1036 1037 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1038 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1039 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1040 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1046 PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1047 { 1048 MatScalar *v,*diag; 1049 PetscScalar *xp,x0,x1,x2,x3; 1050 PetscInt nz,*vj,k; 1051 1052 PetscFunctionBegin; 1053 for (k=0; k<mbs; k++){ 1054 v = aa + 16*ai[k]; 1055 xp = x + k*4; 1056 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */ 1057 nz = ai[k+1] - ai[k]; 1058 vj = aj + ai[k]; 1059 xp = x + (*vj)*4; 1060 while (nz--) { 1061 /* x(:) += U(k,:)^T*(Dk*xk) */ 1062 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3; 1063 xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3; 1064 xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3; 1065 xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3; 1066 vj++; xp = x + (*vj)*4; 1067 v += 16; 1068 } 1069 /* xk = inv(Dk)*(Dk*xk) */ 1070 diag = aa+k*16; /* ptr to inv(Dk) */ 1071 xp = x + k*4; 1072 xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 1073 xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 1074 xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3; 1075 xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3; 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private" 1082 PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1083 { 1084 MatScalar *v; 1085 PetscScalar *xp,x0,x1,x2,x3; 1086 PetscInt nz,*vj,k; 1087 1088 PetscFunctionBegin; 1089 for (k=mbs-1; k>=0; k--){ 1090 v = aa + 16*ai[k]; 1091 xp = x + k*4; 1092 x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */ 1093 nz = ai[k+1] - ai[k]; 1094 vj = aj + ai[k]; 1095 xp = x + (*vj)*4; 1096 while (nz--) { 1097 /* xk += U(k,:)*x(:) */ 1098 x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3]; 1099 x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3]; 1100 x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3]; 1101 x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3]; 1102 vj++; 1103 v += 16; xp = x + (*vj)*4; 1104 } 1105 xp = x + k*4; 1106 xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3; 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1113 PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1114 { 1115 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1116 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1117 MatScalar *aa=a->a; 1118 PetscScalar *x,*b; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1123 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1124 1125 /* solve U^T * D * y = b by forward substitution */ 1126 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1127 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1128 1129 /* solve U*x = y by back substitution */ 1130 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1133 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1139 PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1140 { 1141 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1142 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1143 MatScalar *aa=a->a; 1144 PetscScalar *x,*b; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1150 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ 1151 ierr = ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1152 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1153 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1154 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace" 1160 PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1161 { 1162 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1163 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1164 MatScalar *aa=a->a; 1165 PetscScalar *x,*b; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1170 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1171 ierr = PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1172 ierr = BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1174 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1175 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_inplace" 1181 PetscErrorCode MatSolve_SeqSBAIJ_3_inplace(Mat A,Vec bb,Vec xx) 1182 { 1183 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1184 IS isrow=a->row; 1185 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1186 PetscErrorCode ierr; 1187 const PetscInt *r; 1188 PetscInt nz,*vj,k,idx; 1189 MatScalar *aa=a->a,*v,*diag; 1190 PetscScalar *x,*b,x0,x1,x2,*t,*tp; 1191 1192 PetscFunctionBegin; 1193 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1194 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1195 t = a->solve_work; 1196 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1197 1198 /* solve U^T * D * y = b by forward substitution */ 1199 tp = t; 1200 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1201 idx = 3*r[k]; 1202 tp[0] = b[idx]; 1203 tp[1] = b[idx+1]; 1204 tp[2] = b[idx+2]; 1205 tp += 3; 1206 } 1207 1208 for (k=0; k<mbs; k++){ 1209 v = aa + 9*ai[k]; 1210 vj = aj + ai[k]; 1211 tp = t + k*3; 1212 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; 1213 nz = ai[k+1] - ai[k]; 1214 1215 tp = t + (*vj)*3; 1216 while (nz--) { 1217 tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1218 tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1219 tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1220 vj++; tp = t + (*vj)*3; 1221 v += 9; 1222 } 1223 1224 /* xk = inv(Dk)*(Dk*xk) */ 1225 diag = aa+k*9; /* ptr to inv(Dk) */ 1226 tp = t + k*3; 1227 tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1228 tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1229 tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1230 } 1231 1232 /* solve U*x = y by back substitution */ 1233 for (k=mbs-1; k>=0; k--){ 1234 v = aa + 9*ai[k]; 1235 vj = aj + ai[k]; 1236 tp = t + k*3; 1237 x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */ 1238 nz = ai[k+1] - ai[k]; 1239 1240 tp = t + (*vj)*3; 1241 while (nz--) { 1242 /* xk += U(k,:)*x(:) */ 1243 x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2]; 1244 x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2]; 1245 x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2]; 1246 vj++; tp = t + (*vj)*3; 1247 v += 9; 1248 } 1249 tp = t + k*3; 1250 tp[0] = x0; tp[1] = x1; tp[2] = x2; 1251 idx = 3*r[k]; 1252 x[idx] = x0; 1253 x[idx+1] = x1; 1254 x[idx+2] = x2; 1255 } 1256 1257 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1259 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1260 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1266 PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1267 { 1268 MatScalar *v,*diag; 1269 PetscScalar *xp,x0,x1,x2; 1270 PetscInt nz,*vj,k; 1271 1272 PetscFunctionBegin; 1273 for (k=0; k<mbs; k++){ 1274 v = aa + 9*ai[k]; 1275 xp = x + k*3; 1276 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */ 1277 nz = ai[k+1] - ai[k]; 1278 vj = aj + ai[k]; 1279 xp = x + (*vj)*3; 1280 while (nz--) { 1281 /* x(:) += U(k,:)^T*(Dk*xk) */ 1282 xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2; 1283 xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2; 1284 xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2; 1285 vj++; xp = x + (*vj)*3; 1286 v += 9; 1287 } 1288 /* xk = inv(Dk)*(Dk*xk) */ 1289 diag = aa+k*9; /* ptr to inv(Dk) */ 1290 xp = x + k*3; 1291 xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 1292 xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 1293 xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private" 1300 PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1301 { 1302 MatScalar *v; 1303 PetscScalar *xp,x0,x1,x2; 1304 PetscInt nz,*vj,k; 1305 1306 PetscFunctionBegin; 1307 for (k=mbs-1; k>=0; k--){ 1308 v = aa + 9*ai[k]; 1309 xp = x + k*3; 1310 x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */ 1311 nz = ai[k+1] - ai[k]; 1312 vj = aj + ai[k]; 1313 xp = x + (*vj)*3; 1314 while (nz--) { 1315 /* xk += U(k,:)*x(:) */ 1316 x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2]; 1317 x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2]; 1318 x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2]; 1319 vj++; 1320 v += 9; xp = x + (*vj)*3; 1321 } 1322 xp = x + k*3; 1323 xp[0] = x0; xp[1] = x1; xp[2] = x2; 1324 } 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1330 PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1331 { 1332 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1333 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1334 MatScalar *aa=a->a; 1335 PetscScalar *x,*b; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1340 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1341 1342 /* solve U^T * D * y = b by forward substitution */ 1343 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1344 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1345 1346 /* solve U*x = y by back substitution */ 1347 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1348 1349 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1351 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1357 PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1358 { 1359 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1360 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1361 MatScalar *aa=a->a; 1362 PetscScalar *x,*b; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1367 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1368 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1369 ierr = ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1371 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1372 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace" 1378 PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1379 { 1380 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1381 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1382 MatScalar *aa=a->a; 1383 PetscScalar *x,*b; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1388 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1389 ierr = PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1390 ierr = BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1391 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1392 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1393 ierr = PetscLogFlops(2.0*bs2*(a->nz-mbs));CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_inplace" 1399 PetscErrorCode MatSolve_SeqSBAIJ_2_inplace(Mat A,Vec bb,Vec xx) 1400 { 1401 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data; 1402 IS isrow=a->row; 1403 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1404 PetscErrorCode ierr; 1405 const PetscInt *r; 1406 PetscInt nz,*vj,k,k2,idx; 1407 MatScalar *aa=a->a,*v,*diag; 1408 PetscScalar *x,*b,x0,x1,*t; 1409 1410 PetscFunctionBegin; 1411 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1412 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1413 t = a->solve_work; 1414 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1415 1416 /* solve U^T * D * y = perm(b) by forward substitution */ 1417 for (k=0; k<mbs; k++) { /* t <- perm(b) */ 1418 idx = 2*r[k]; 1419 t[k*2] = b[idx]; 1420 t[k*2+1] = b[idx+1]; 1421 } 1422 for (k=0; k<mbs; k++){ 1423 v = aa + 4*ai[k]; 1424 vj = aj + ai[k]; 1425 k2 = k*2; 1426 x0 = t[k2]; x1 = t[k2+1]; 1427 nz = ai[k+1] - ai[k]; 1428 while (nz--) { 1429 t[(*vj)*2] += v[0]*x0 + v[1]*x1; 1430 t[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1431 vj++; v += 4; 1432 } 1433 diag = aa+k*4; /* ptr to inv(Dk) */ 1434 t[k2] = diag[0]*x0 + diag[2]*x1; 1435 t[k2+1] = diag[1]*x0 + diag[3]*x1; 1436 } 1437 1438 /* solve U*x = y by back substitution */ 1439 for (k=mbs-1; k>=0; k--){ 1440 v = aa + 4*ai[k]; 1441 vj = aj + ai[k]; 1442 k2 = k*2; 1443 x0 = t[k2]; x1 = t[k2+1]; 1444 nz = ai[k+1] - ai[k]; 1445 while (nz--) { 1446 x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1]; 1447 x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1]; 1448 vj++; v += 4; 1449 } 1450 t[k2] = x0; 1451 t[k2+1] = x1; 1452 idx = 2*r[k]; 1453 x[idx] = x0; 1454 x[idx+1] = x1; 1455 } 1456 1457 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1458 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1459 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1460 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1466 PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1467 { 1468 MatScalar *v,*diag; 1469 PetscScalar x0,x1; 1470 PetscInt nz,*vj,k,k2; 1471 1472 PetscFunctionBegin; 1473 for (k=0; k<mbs; k++){ 1474 v = aa + 4*ai[k]; 1475 vj = aj + ai[k]; 1476 k2 = k*2; 1477 x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */ 1478 nz = ai[k+1] - ai[k]; 1479 1480 while (nz--) { 1481 /* x(:) += U(k,:)^T*(Dk*xk) */ 1482 x[(*vj)*2] += v[0]*x0 + v[1]*x1; 1483 x[(*vj)*2+1] += v[2]*x0 + v[3]*x1; 1484 vj++; v += 4; 1485 } 1486 /* xk = inv(Dk)*(Dk*xk) */ 1487 diag = aa+k*4; /* ptr to inv(Dk) */ 1488 x[k2] = diag[0]*x0 + diag[2]*x1; 1489 x[k2+1] = diag[1]*x0 + diag[3]*x1; 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private" 1496 PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x) 1497 { 1498 MatScalar *v; 1499 PetscScalar x0,x1; 1500 PetscInt nz,*vj,k,k2; 1501 1502 PetscFunctionBegin; 1503 for (k=mbs-1; k>=0; k--){ 1504 v = aa + 4*ai[k]; 1505 vj = aj + ai[k]; 1506 k2 = k*2; 1507 x0 = x[k2]; x1 = x[k2+1]; /* xk */ 1508 nz = ai[k+1] - ai[k]; 1509 while (nz--) { 1510 /* xk += U(k,:)*x(:) */ 1511 x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1]; 1512 x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1]; 1513 vj++; v += 4; 1514 } 1515 x[k2] = x0; 1516 x[k2+1] = x1; 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1523 PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1524 { 1525 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1526 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1527 MatScalar *aa=a->a; 1528 PetscScalar *x,*b; 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1533 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1534 1535 /* solve U^T * D * y = b by forward substitution */ 1536 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1537 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1538 1539 /* solve U*x = y by back substitution */ 1540 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1541 1542 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1544 ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1550 PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1551 { 1552 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1553 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2=a->bs2; 1554 MatScalar *aa=a->a; 1555 PetscScalar *x,*b; 1556 PetscErrorCode ierr; 1557 1558 PetscFunctionBegin; 1559 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1560 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1561 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1562 ierr = ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1563 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1564 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1565 ierr = PetscLogFlops(2.0*bs2*a->nz - bs*mbs);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace" 1571 PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1572 { 1573 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1574 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs2=a->bs2; 1575 MatScalar *aa=a->a; 1576 PetscScalar *x,*b; 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1581 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1582 ierr = PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1583 ierr = BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);CHKERRQ(ierr); 1584 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1585 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1586 ierr = PetscLogFlops(2.0*bs2*(a->nz - mbs));CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSolve_SeqSBAIJ_1" 1592 PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1593 { 1594 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1595 IS isrow=a->row; 1596 PetscErrorCode ierr; 1597 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1598 const MatScalar *aa=a->a,*v; 1599 const PetscScalar *b; 1600 PetscScalar *x,xk,*t; 1601 PetscInt nz,k,j; 1602 1603 PetscFunctionBegin; 1604 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1605 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1606 t = a->solve_work; 1607 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1608 1609 /* solve U^T*D*y = perm(b) by forward substitution */ 1610 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1611 for (k=0; k<mbs; k++){ 1612 v = aa + ai[k]; 1613 vj = aj + ai[k]; 1614 xk = t[k]; 1615 nz = ai[k+1] - ai[k] - 1; 1616 for (j=0; j<nz; j++) t[vj[j]] += v[j]*xk; 1617 t[k] = xk*v[nz]; /* v[nz] = 1/D(k) */ 1618 } 1619 1620 /* solve U*perm(x) = y by back substitution */ 1621 for (k=mbs-1; k>=0; k--){ 1622 v = aa + adiag[k] - 1; 1623 vj = aj + adiag[k] - 1; 1624 nz = ai[k+1] - ai[k] - 1; 1625 for (j=0; j<nz; j++) t[k] += v[-j]*t[vj[-j]]; 1626 x[rp[k]] = t[k]; 1627 } 1628 1629 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1632 ierr = PetscLogFlops(4.0*a->nz - 3.0*mbs);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_inplace" 1638 PetscErrorCode MatSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1639 { 1640 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1641 IS isrow=a->row; 1642 PetscErrorCode ierr; 1643 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1644 const MatScalar *aa=a->a,*v; 1645 PetscScalar *x,*b,xk,*t; 1646 PetscInt nz,k; 1647 1648 PetscFunctionBegin; 1649 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1650 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1651 t = a->solve_work; 1652 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1653 1654 /* solve U^T*D*y = perm(b) by forward substitution */ 1655 for (k=0; k<mbs; k++) t[k] = b[rp[k]]; 1656 for (k=0; k<mbs; k++){ 1657 v = aa + ai[k] + 1; 1658 vj = aj + ai[k] + 1; 1659 xk = t[k]; 1660 nz = ai[k+1] - ai[k] - 1; 1661 while (nz--) t[*vj++] += (*v++) * xk; 1662 t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */ 1663 } 1664 1665 /* solve U*perm(x) = y by back substitution */ 1666 for (k=mbs-1; k>=0; k--){ 1667 v = aa + ai[k] + 1; 1668 vj = aj + ai[k] + 1; 1669 nz = ai[k+1] - ai[k] - 1; 1670 while (nz--) t[k] += (*v++) * t[*vj++]; 1671 x[rp[k]] = t[k]; 1672 } 1673 1674 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1675 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1676 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1677 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1" 1683 PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1684 { 1685 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1686 IS isrow=a->row; 1687 PetscErrorCode ierr; 1688 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1689 const MatScalar *aa=a->a,*v; 1690 PetscReal diagk; 1691 PetscScalar *x,*b,xk; 1692 PetscInt nz,k; 1693 1694 PetscFunctionBegin; 1695 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1696 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1698 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1699 1700 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1701 for (k=0; k<mbs; k++){ 1702 v = aa + ai[k]; 1703 vj = aj + ai[k]; 1704 xk = x[k]; 1705 nz = ai[k+1] - ai[k] - 1; 1706 while (nz--) x[*vj++] += (*v++) * xk; 1707 1708 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1709 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1710 x[k] = xk*sqrt(diagk); 1711 } 1712 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1713 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1715 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_inplace" 1721 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1722 { 1723 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1724 IS isrow=a->row; 1725 PetscErrorCode ierr; 1726 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1727 const MatScalar *aa=a->a,*v; 1728 PetscReal diagk; 1729 PetscScalar *x,*b,xk; 1730 PetscInt nz,k; 1731 1732 PetscFunctionBegin; 1733 /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */ 1734 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1735 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1736 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1737 1738 for (k=0; k<mbs; k++) x[k] = b[rp[k]]; 1739 for (k=0; k<mbs; k++){ 1740 v = aa + ai[k] + 1; 1741 vj = aj + ai[k] + 1; 1742 xk = x[k]; 1743 nz = ai[k+1] - ai[k] - 1; 1744 while (nz--) x[*vj++] += (*v++) * xk; 1745 1746 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 1747 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1748 x[k] = xk*sqrt(diagk); 1749 } 1750 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1751 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1752 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1753 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1" 1759 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx) 1760 { 1761 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1762 IS isrow=a->row; 1763 PetscErrorCode ierr; 1764 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj,*adiag = a->diag; 1765 const MatScalar *aa=a->a,*v; 1766 PetscReal diagk; 1767 PetscScalar *x,*b,*t; 1768 PetscInt nz,k; 1769 1770 PetscFunctionBegin; 1771 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1772 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774 t = a->solve_work; 1775 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1776 1777 for (k=mbs-1; k>=0; k--){ 1778 v = aa + ai[k]; 1779 vj = aj + ai[k]; 1780 diagk = PetscRealPart(aa[adiag[k]]); 1781 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1782 t[k] = b[k] * sqrt(diagk); 1783 nz = ai[k+1] - ai[k] - 1; 1784 while (nz--) t[k] += (*v++) * t[*vj++]; 1785 x[rp[k]] = t[k]; 1786 } 1787 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1790 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_inplace" 1796 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 1797 { 1798 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1799 IS isrow=a->row; 1800 PetscErrorCode ierr; 1801 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,*vj; 1802 const MatScalar *aa=a->a,*v; 1803 PetscReal diagk; 1804 PetscScalar *x,*b,*t; 1805 PetscInt nz,k; 1806 1807 PetscFunctionBegin; 1808 /* solve D^(1/2)*U*perm(x) = b by back substitution */ 1809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1811 t = a->solve_work; 1812 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1813 1814 for (k=mbs-1; k>=0; k--){ 1815 v = aa + ai[k] + 1; 1816 vj = aj + ai[k] + 1; 1817 diagk = PetscRealPart(aa[ai[k]]); 1818 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 1819 t[k] = b[k] * sqrt(diagk); 1820 nz = ai[k+1] - ai[k] - 1; 1821 while (nz--) t[k] += (*v++) * t[*vj++]; 1822 x[rp[k]] = t[k]; 1823 } 1824 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1825 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1826 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1827 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatSolves_SeqSBAIJ_1" 1833 PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx) 1834 { 1835 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 if (A->rmap->bs == 1) { 1840 ierr = MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);CHKERRQ(ierr); 1841 } else { 1842 IS isrow=a->row; 1843 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1844 const MatScalar *aa=a->a,*v; 1845 PetscScalar *x,*b,*t; 1846 PetscInt nz,k,n,i,j; 1847 if (bb->n > a->solves_work_n) { 1848 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1849 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1850 a->solves_work_n = bb->n; 1851 } 1852 n = bb->n; 1853 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1854 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1855 t = a->solves_work; 1856 1857 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1858 1859 /* solve U^T*D*y = perm(b) by forward substitution */ 1860 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1861 for (k=0; k<mbs; k++){ 1862 v = aa + ai[k]; 1863 vj = aj + ai[k]; 1864 nz = ai[k+1] - ai[k] - 1; 1865 for (j=0; j<nz; j++){ 1866 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1867 v++;vj++; 1868 } 1869 for (i=0; i<n; i++) t[n*k+i] *= aa[nz]; /* note: aa[nz] = 1/D(k) */ 1870 } 1871 1872 /* solve U*perm(x) = y by back substitution */ 1873 for (k=mbs-1; k>=0; k--){ 1874 v = aa + ai[k] - 1; 1875 vj = aj + ai[k] - 1; 1876 nz = ai[k+1] - ai[k] - 1; 1877 for (j=0; j<nz; j++){ 1878 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1879 v++;vj++; 1880 } 1881 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1882 } 1883 1884 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1885 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1886 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1887 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatSolves_SeqSBAIJ_1_inplace" 1894 PetscErrorCode MatSolves_SeqSBAIJ_1_inplace(Mat A,Vecs bb,Vecs xx) 1895 { 1896 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 if (A->rmap->bs == 1) { 1901 ierr = MatSolve_SeqSBAIJ_1_inplace(A,bb->v,xx->v);CHKERRQ(ierr); 1902 } else { 1903 IS isrow=a->row; 1904 const PetscInt *vj,mbs=a->mbs,*ai=a->i,*aj=a->j,*rp; 1905 const MatScalar *aa=a->a,*v; 1906 PetscScalar *x,*b,*t; 1907 PetscInt nz,k,n,i; 1908 if (bb->n > a->solves_work_n) { 1909 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 1910 ierr = PetscMalloc(bb->n*A->rmap->N*sizeof(PetscScalar),&a->solves_work);CHKERRQ(ierr); 1911 a->solves_work_n = bb->n; 1912 } 1913 n = bb->n; 1914 ierr = VecGetArray(bb->v,&b);CHKERRQ(ierr); 1915 ierr = VecGetArray(xx->v,&x);CHKERRQ(ierr); 1916 t = a->solves_work; 1917 1918 ierr = ISGetIndices(isrow,&rp);CHKERRQ(ierr); 1919 1920 /* solve U^T*D*y = perm(b) by forward substitution */ 1921 for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */ 1922 for (k=0; k<mbs; k++){ 1923 v = aa + ai[k]; 1924 vj = aj + ai[k]; 1925 nz = ai[k+1] - ai[k]; 1926 while (nz--) { 1927 for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i]; 1928 v++;vj++; 1929 } 1930 for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */ 1931 } 1932 1933 /* solve U*perm(x) = y by back substitution */ 1934 for (k=mbs-1; k>=0; k--){ 1935 v = aa + ai[k]; 1936 vj = aj + ai[k]; 1937 nz = ai[k+1] - ai[k]; 1938 while (nz--) { 1939 for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i]; 1940 v++;vj++; 1941 } 1942 for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i]; 1943 } 1944 1945 ierr = ISRestoreIndices(isrow,&rp);CHKERRQ(ierr); 1946 ierr = VecRestoreArray(bb->v,&b);CHKERRQ(ierr); 1947 ierr = VecRestoreArray(xx->v,&x);CHKERRQ(ierr); 1948 ierr = PetscLogFlops(bb->n*(4.0*a->nz - 3.0*mbs));CHKERRQ(ierr); 1949 } 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering" 1955 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1956 { 1957 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1958 PetscErrorCode ierr; 1959 const PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*vj,*adiag = a->diag; 1960 const MatScalar *aa=a->a,*v; 1961 const PetscScalar *b; 1962 PetscScalar *x,xi; 1963 PetscInt nz,i,j; 1964 1965 PetscFunctionBegin; 1966 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1968 1969 /* solve U^T*D*y = b by forward substitution */ 1970 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1971 for (i=0; i<mbs; i++){ 1972 v = aa + ai[i]; 1973 vj = aj + ai[i]; 1974 xi = x[i]; 1975 nz = ai[i+1] - ai[i] - 1; /* exclude diag[i] */ 1976 for (j=0; j<nz; j++){ 1977 x[vj[j]] += v[j]* xi; 1978 } 1979 x[i] = xi*v[nz]; /* v[nz] = aa[diag[i]] = 1/D(i) */ 1980 } 1981 1982 /* solve U*x = y by backward substitution */ 1983 for (i=mbs-2; i>=0; i--){ 1984 xi = x[i]; 1985 v = aa + adiag[i] - 1; /* end of row i, excluding diag */ 1986 vj = aj + adiag[i] - 1; 1987 nz = ai[i+1] - ai[i] - 1; 1988 for (j=0; j<nz; j++) xi += v[-j]*x[vj[-j]]; 1989 x[i] = xi; 1990 } 1991 1992 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1993 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2000 PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2001 { 2002 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2003 PetscErrorCode ierr; 2004 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 2005 MatScalar *aa=a->a,*v; 2006 PetscScalar *x,*b,xk; 2007 PetscInt nz,*vj,k; 2008 2009 PetscFunctionBegin; 2010 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2011 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2012 2013 /* solve U^T*D*y = b by forward substitution */ 2014 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2015 for (k=0; k<mbs; k++){ 2016 v = aa + ai[k] + 1; 2017 vj = aj + ai[k] + 1; 2018 xk = x[k]; 2019 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2020 while (nz--) x[*vj++] += (*v++) * xk; 2021 x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */ 2022 } 2023 2024 /* solve U*x = y by back substitution */ 2025 for (k=mbs-2; k>=0; k--){ 2026 v = aa + ai[k] + 1; 2027 vj = aj + ai[k] + 1; 2028 xk = x[k]; 2029 nz = ai[k+1] - ai[k] - 1; 2030 while (nz--) { 2031 xk += (*v++) * x[*vj++]; 2032 } 2033 x[k] = xk; 2034 } 2035 2036 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2038 ierr = PetscLogFlops(4.0*a->nz - 3*mbs);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering" 2044 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2045 { 2046 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2047 PetscErrorCode ierr; 2048 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag; 2049 const MatScalar *aa=a->a,*v; 2050 PetscReal diagk; 2051 PetscScalar *x,*b; 2052 PetscInt nz,*vj,k; 2053 2054 PetscFunctionBegin; 2055 /* solve U^T*D^(1/2)*x = b by forward substitution */ 2056 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2057 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2058 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2059 for (k=0; k<mbs; k++){ 2060 v = aa + ai[k]; 2061 vj = aj + ai[k]; 2062 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2063 while (nz--) x[*vj++] += (*v++) * x[k]; 2064 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[adiag[k]] = 1/D(k) */ 2065 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[adiag[k]]),PetscImaginaryPart(aa[adiag[k]])); 2066 x[k] *= sqrt(diagk); 2067 } 2068 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2069 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2070 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2076 PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2077 { 2078 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2079 PetscErrorCode ierr; 2080 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 2081 MatScalar *aa=a->a,*v; 2082 PetscReal diagk; 2083 PetscScalar *x,*b; 2084 PetscInt nz,*vj,k; 2085 2086 PetscFunctionBegin; 2087 /* solve U^T*D^(1/2)*x = b by forward substitution */ 2088 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2089 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2090 ierr = PetscMemcpy(x,b,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 2091 for (k=0; k<mbs; k++){ 2092 v = aa + ai[k] + 1; 2093 vj = aj + ai[k] + 1; 2094 nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */ 2095 while (nz--) x[*vj++] += (*v++) * x[k]; 2096 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2097 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]])); 2098 x[k] *= sqrt(diagk); 2099 } 2100 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2101 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2102 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering" 2108 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 2109 { 2110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2111 PetscErrorCode ierr; 2112 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*adiag = a->diag; 2113 MatScalar *aa=a->a,*v; 2114 PetscReal diagk; 2115 PetscScalar *x,*b; 2116 PetscInt nz,*vj,k; 2117 2118 PetscFunctionBegin; 2119 /* solve D^(1/2)*U*x = b by back substitution */ 2120 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2121 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2122 2123 for (k=mbs-1; k>=0; k--){ 2124 v = aa + ai[k]; 2125 vj = aj + ai[k]; 2126 diagk = PetscRealPart(aa[adiag[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2127 if (PetscImaginaryPart(aa[adiag[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 2128 x[k] = sqrt(diagk)*b[k]; 2129 nz = ai[k+1] - ai[k] - 1; 2130 while (nz--) x[k] += (*v++) * x[*vj++]; 2131 } 2132 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2133 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2134 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace" 2140 PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 2141 { 2142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 2143 PetscErrorCode ierr; 2144 PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j; 2145 MatScalar *aa=a->a,*v; 2146 PetscReal diagk; 2147 PetscScalar *x,*b; 2148 PetscInt nz,*vj,k; 2149 2150 PetscFunctionBegin; 2151 /* solve D^(1/2)*U*x = b by back substitution */ 2152 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2154 2155 for (k=mbs-1; k>=0; k--){ 2156 v = aa + ai[k] + 1; 2157 vj = aj + ai[k] + 1; 2158 diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */ 2159 if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative"); 2160 x[k] = sqrt(diagk)*b[k]; 2161 nz = ai[k+1] - ai[k] - 1; 2162 while (nz--) x[k] += (*v++) * x[*vj++]; 2163 } 2164 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2166 ierr = PetscLogFlops(2.0*a->nz - mbs);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_MSR" 2173 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat B,Mat A,IS perm,const MatFactorInfo *info) 2174 { 2175 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2176 PetscErrorCode ierr; 2177 const PetscInt *rip,mbs = a->mbs,*ai = a->i,*aj = a->j; 2178 PetscInt *jutmp,bs = A->rmap->bs,bs2=a->bs2,i; 2179 PetscInt m,reallocs = 0,*levtmp; 2180 PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 2181 PetscInt incrlev,*lev,shift,prow,nz; 2182 PetscReal f = info->fill,levels = info->levels; 2183 PetscTruth perm_identity; 2184 2185 PetscFunctionBegin; 2186 /* check whether perm is the identity mapping */ 2187 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2188 2189 if (perm_identity){ 2190 a->permute = PETSC_FALSE; 2191 ai = a->i; aj = a->j; 2192 } else { /* non-trivial permutation */ 2193 SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 2194 a->permute = PETSC_TRUE; 2195 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 2196 ai = a->inew; aj = a->jnew; 2197 } 2198 2199 /* initialization */ 2200 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2201 umax = (PetscInt)(f*ai[mbs] + 1); 2202 ierr = PetscMalloc(umax*sizeof(PetscInt),&lev);CHKERRQ(ierr); 2203 umax += mbs + 1; 2204 shift = mbs + 1; 2205 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 2206 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 2207 iu[0] = mbs + 1; 2208 juidx = mbs + 1; 2209 /* prowl: linked list for pivot row */ 2210 ierr = PetscMalloc3(mbs,PetscInt,&prowl,mbs,PetscInt,&q,mbs,PetscInt,&levtmp);CHKERRQ(ierr); 2211 /* q: linked list for col index */ 2212 2213 for (i=0; i<mbs; i++){ 2214 prowl[i] = mbs; 2215 q[i] = 0; 2216 } 2217 2218 /* for each row k */ 2219 for (k=0; k<mbs; k++){ 2220 nzk = 0; 2221 q[k] = mbs; 2222 /* copy current row into linked list */ 2223 nz = ai[rip[k]+1] - ai[rip[k]]; 2224 j = ai[rip[k]]; 2225 while (nz--){ 2226 vj = rip[aj[j++]]; 2227 if (vj > k){ 2228 qm = k; 2229 do { 2230 m = qm; qm = q[m]; 2231 } while(qm < vj); 2232 if (qm == vj) { 2233 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 2234 } 2235 nzk++; 2236 q[m] = vj; 2237 q[vj] = qm; 2238 levtmp[vj] = 0; /* initialize lev for nonzero element */ 2239 } 2240 } 2241 2242 /* modify nonzero structure of k-th row by computing fill-in 2243 for each row prow to be merged in */ 2244 prow = k; 2245 prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */ 2246 2247 while (prow < k){ 2248 /* merge row prow into k-th row */ 2249 jmin = iu[prow] + 1; 2250 jmax = iu[prow+1]; 2251 qm = k; 2252 for (j=jmin; j<jmax; j++){ 2253 incrlev = lev[j-shift] + 1; 2254 if (incrlev > levels) continue; 2255 vj = ju[j]; 2256 do { 2257 m = qm; qm = q[m]; 2258 } while (qm < vj); 2259 if (qm != vj){ /* a fill */ 2260 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 2261 levtmp[vj] = incrlev; 2262 } else { 2263 if (levtmp[vj] > incrlev) levtmp[vj] = incrlev; 2264 } 2265 } 2266 prow = prowl[prow]; /* next pivot row */ 2267 } 2268 2269 /* add k to row list for first nonzero element in k-th row */ 2270 if (nzk > 1){ 2271 i = q[k]; /* col value of first nonzero element in k_th row of U */ 2272 prowl[k] = prowl[i]; prowl[i] = k; 2273 } 2274 iu[k+1] = iu[k] + nzk; 2275 2276 /* allocate more space to ju and lev if needed */ 2277 if (iu[k+1] > umax) { 2278 /* estimate how much additional space we will need */ 2279 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2280 /* just double the memory each time */ 2281 maxadd = umax; 2282 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 2283 umax += maxadd; 2284 2285 /* allocate a longer ju */ 2286 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2287 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 2288 ierr = PetscFree(ju);CHKERRQ(ierr); 2289 ju = jutmp; 2290 2291 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 2292 ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));CHKERRQ(ierr); 2293 ierr = PetscFree(lev);CHKERRQ(ierr); 2294 lev = jutmp; 2295 reallocs += 2; /* count how many times we realloc */ 2296 } 2297 2298 /* save nonzero structure of k-th row in ju */ 2299 i=k; 2300 while (nzk --) { 2301 i = q[i]; 2302 ju[juidx] = i; 2303 lev[juidx-shift] = levtmp[i]; 2304 juidx++; 2305 } 2306 } 2307 2308 #if defined(PETSC_USE_INFO) 2309 if (ai[mbs] != 0) { 2310 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2311 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2312 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2313 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 2314 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2315 } else { 2316 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2317 } 2318 #endif 2319 2320 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2321 ierr = PetscFree3(prowl,q,levtmp);CHKERRQ(ierr); 2322 ierr = PetscFree(lev);CHKERRQ(ierr); 2323 2324 /* put together the new matrix */ 2325 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 2326 2327 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 2328 b = (Mat_SeqSBAIJ*)(B)->data; 2329 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2330 b->singlemalloc = PETSC_FALSE; 2331 b->free_a = PETSC_TRUE; 2332 b->free_ij = PETSC_TRUE; 2333 /* the next line frees the default space generated by the Create() */ 2334 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 2335 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 2336 b->j = ju; 2337 b->i = iu; 2338 b->diag = 0; 2339 b->ilen = 0; 2340 b->imax = 0; 2341 2342 if (b->row) { 2343 ierr = ISDestroy(b->row);CHKERRQ(ierr); 2344 } 2345 if (b->icol) { 2346 ierr = ISDestroy(b->icol);CHKERRQ(ierr); 2347 } 2348 b->row = perm; 2349 b->icol = perm; 2350 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2351 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2352 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2353 /* In b structure: Free imax, ilen, old a, old j. 2354 Allocate idnew, solve_work, new a, new j */ 2355 ierr = PetscLogObjectMemory(B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2356 b->maxnz = b->nz = iu[mbs]; 2357 2358 (B)->info.factor_mallocs = reallocs; 2359 (B)->info.fill_ratio_given = f; 2360 if (ai[mbs] != 0) { 2361 (B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 2362 } else { 2363 (B)->info.fill_ratio_needed = 0.0; 2364 } 2365 ierr = MatSeqSBAIJSetNumericFactorization_inplace(B,perm_identity);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 /* 2370 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure 2371 */ 2372 #include "petscbt.h" 2373 #include "../src/mat/utils/freespace.h" 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ" 2376 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2377 { 2378 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 2379 PetscErrorCode ierr; 2380 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2381 PetscInt bs=A->rmap->bs,am=a->mbs,d,*ai=a->i,*aj= a->j; 2382 const PetscInt *rip; 2383 PetscInt reallocs=0,i,*ui,*udiag,*cols; 2384 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2385 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*uj,**uj_ptr,**uj_lvl_ptr; 2386 PetscReal fill=info->fill,levels=info->levels; 2387 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2388 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2389 PetscBT lnkbt; 2390 2391 PetscFunctionBegin; 2392 if (bs > 1){ 2393 ierr = MatICCFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2397 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2398 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2399 2400 /* check whether perm is the identity mapping */ 2401 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2402 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 2403 a->permute = PETSC_FALSE; 2404 2405 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2406 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2407 ui[0] = 0; 2408 2409 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2410 if (!levels){ 2411 /* reuse the column pointers and row offsets for memory savings */ 2412 for (i=0; i<am; i++) { 2413 ncols = ai[i+1] - ai[i]; 2414 ui[i+1] = ui[i] + ncols; 2415 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2416 } 2417 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2418 cols = uj; 2419 for (i=0; i<am; i++) { 2420 aj = a->j + ai[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2421 ncols = ai[i+1] - ai[i] -1; 2422 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2423 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2424 } 2425 } else { /* case: levels>0 */ 2426 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2427 2428 /* initialization */ 2429 /* jl: linked list for storing indices of the pivot rows 2430 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2431 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&il,am,PetscInt,&jl);CHKERRQ(ierr); 2432 for (i=0; i<am; i++){ 2433 jl[i] = am; il[i] = 0; 2434 } 2435 2436 /* create and initialize a linked list for storing column indices of the active row k */ 2437 nlnk = am + 1; 2438 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2439 2440 /* initial FreeSpace size is fill*(ai[am]+1) */ 2441 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2442 current_space = free_space; 2443 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2444 current_space_lvl = free_space_lvl; 2445 2446 for (k=0; k<am; k++){ /* for each active row k */ 2447 /* initialize lnk by the column indices of row k */ 2448 nzk = 0; 2449 ncols = ai[k+1] - ai[k]; 2450 if (!ncols) SETERRQ1(PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k); 2451 cols = aj+ai[k]; 2452 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2453 nzk += nlnk; 2454 2455 /* update lnk by computing fill-in for each pivot row to be merged in */ 2456 prow = jl[k]; /* 1st pivot row */ 2457 2458 while (prow < k){ 2459 nextprow = jl[prow]; 2460 2461 /* merge prow into k-th row */ 2462 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2463 jmax = ui[prow+1]; 2464 ncols = jmax-jmin; 2465 i = jmin - ui[prow]; 2466 2467 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2468 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2469 j = *(uj - 1); 2470 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2471 nzk += nlnk; 2472 2473 /* update il and jl for prow */ 2474 if (jmin < jmax){ 2475 il[prow] = jmin; 2476 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2477 } 2478 prow = nextprow; 2479 } 2480 2481 /* if free space is not available, make more free space */ 2482 if (current_space->local_remaining<nzk) { 2483 i = am - k + 1; /* num of unfactored rows */ 2484 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2485 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2486 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2487 reallocs++; 2488 } 2489 2490 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2491 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2492 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2493 2494 /* add the k-th row into il and jl */ 2495 if (nzk > 1){ 2496 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2497 jl[k] = jl[i]; jl[i] = k; 2498 il[k] = ui[k] + 1; 2499 } 2500 uj_ptr[k] = current_space->array; 2501 uj_lvl_ptr[k] = current_space_lvl->array; 2502 2503 current_space->array += nzk; 2504 current_space->local_used += nzk; 2505 current_space->local_remaining -= nzk; 2506 current_space_lvl->array += nzk; 2507 current_space_lvl->local_used += nzk; 2508 current_space_lvl->local_remaining -= nzk; 2509 2510 ui[k+1] = ui[k] + nzk; 2511 } 2512 2513 #if defined(PETSC_USE_INFO) 2514 if (ai[am] != 0) { 2515 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2516 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2517 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2518 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2519 } else { 2520 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2521 } 2522 #endif 2523 2524 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2525 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2526 2527 /* destroy list of free space and other temporary array(s) */ 2528 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2529 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2530 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2531 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2532 2533 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2534 2535 /* put together the new matrix in MATSEQSBAIJ format */ 2536 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2537 2538 b = (Mat_SeqSBAIJ*)(fact)->data; 2539 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2540 b->singlemalloc = PETSC_FALSE; 2541 b->free_a = PETSC_TRUE; 2542 b->free_ij = free_ij; 2543 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2544 b->j = uj; 2545 b->i = ui; 2546 b->diag = udiag; 2547 b->free_diag = PETSC_TRUE; 2548 b->ilen = 0; 2549 b->imax = 0; 2550 b->row = perm; 2551 b->col = perm; 2552 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2553 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2554 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2555 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2556 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2557 b->maxnz = b->nz = ui[am]; 2558 2559 fact->info.factor_mallocs = reallocs; 2560 fact->info.fill_ratio_given = fill; 2561 if (ai[am] != 0) { 2562 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2563 } else { 2564 fact->info.fill_ratio_needed = 0.0; 2565 } 2566 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 2567 PetscFunctionReturn(0); 2568 } 2569 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "MatICCFactorSymbolic_SeqSBAIJ_inplace" 2572 PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2573 { 2574 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2575 Mat_SeqSBAIJ *b; 2576 PetscErrorCode ierr; 2577 PetscTruth perm_identity,free_ij = PETSC_TRUE,missing; 2578 PetscInt bs=A->rmap->bs,am=a->mbs,d; 2579 const PetscInt *cols,*rip,*ai,*aj; 2580 PetscInt reallocs=0,i,*ui; 2581 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2582 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 2583 PetscReal fill=info->fill,levels=info->levels,ratio_needed; 2584 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2585 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2586 PetscBT lnkbt; 2587 2588 PetscFunctionBegin; 2589 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2590 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2591 2592 /* 2593 This code originally uses Modified Sparse Row (MSR) storage 2594 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 2595 Then it is rewritten so the factor B takes seqsbaij format. However the associated 2596 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 2597 thus the original code in MSR format is still used for these cases. 2598 The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 2599 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 2600 */ 2601 if (bs > 1){ 2602 ierr = MatICCFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 /* check whether perm is the identity mapping */ 2607 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2608 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format") 2609 2610 /* special case that simply copies fill pattern */ 2611 if (!levels ) { 2612 a->permute = PETSC_FALSE; 2613 /* reuse the column pointers and row offsets for memory savings */ 2614 ui = a->i; 2615 uj = a->j; 2616 free_ij = PETSC_FALSE; 2617 ratio_needed = 1.0; 2618 } else { /* case: levels>0 */ 2619 if (perm_identity){ 2620 a->permute = PETSC_FALSE; 2621 ai = a->i; aj = a->j; 2622 } 2623 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2624 2625 /* initialization */ 2626 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2627 ui[0] = 0; 2628 2629 /* jl: linked list for storing indices of the pivot rows 2630 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2631 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&il,am,PetscInt,&jl);CHKERRQ(ierr); 2632 for (i=0; i<am; i++){ 2633 jl[i] = am; il[i] = 0; 2634 } 2635 2636 /* create and initialize a linked list for storing column indices of the active row k */ 2637 nlnk = am + 1; 2638 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2639 2640 /* initial FreeSpace size is fill*(ai[am]+1) */ 2641 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2642 current_space = free_space; 2643 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2644 current_space_lvl = free_space_lvl; 2645 2646 for (k=0; k<am; k++){ /* for each active row k */ 2647 /* initialize lnk by the column indices of row rip[k] */ 2648 nzk = 0; 2649 ncols = ai[rip[k]+1] - ai[rip[k]]; 2650 cols = aj+ai[rip[k]]; 2651 ierr = PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2652 nzk += nlnk; 2653 2654 /* update lnk by computing fill-in for each pivot row to be merged in */ 2655 prow = jl[k]; /* 1st pivot row */ 2656 2657 while (prow < k){ 2658 nextprow = jl[prow]; 2659 2660 /* merge prow into k-th row */ 2661 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2662 jmax = ui[prow+1]; 2663 ncols = jmax-jmin; 2664 i = jmin - ui[prow]; 2665 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2666 j = *(uj_lvl_ptr[prow] + i - 1); 2667 cols_lvl = uj_lvl_ptr[prow]+i; 2668 ierr = PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2669 nzk += nlnk; 2670 2671 /* update il and jl for prow */ 2672 if (jmin < jmax){ 2673 il[prow] = jmin; 2674 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2675 } 2676 prow = nextprow; 2677 } 2678 2679 /* if free space is not available, make more free space */ 2680 if (current_space->local_remaining<nzk) { 2681 i = am - k + 1; /* num of unfactored rows */ 2682 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2683 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2684 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2685 reallocs++; 2686 } 2687 2688 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2689 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2690 2691 /* add the k-th row into il and jl */ 2692 if (nzk-1 > 0){ 2693 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2694 jl[k] = jl[i]; jl[i] = k; 2695 il[k] = ui[k] + 1; 2696 } 2697 uj_ptr[k] = current_space->array; 2698 uj_lvl_ptr[k] = current_space_lvl->array; 2699 2700 current_space->array += nzk; 2701 current_space->local_used += nzk; 2702 current_space->local_remaining -= nzk; 2703 current_space_lvl->array += nzk; 2704 current_space_lvl->local_used += nzk; 2705 current_space_lvl->local_remaining -= nzk; 2706 2707 ui[k+1] = ui[k] + nzk; 2708 } 2709 2710 #if defined(PETSC_USE_INFO) 2711 if (ai[am] != 0) { 2712 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2713 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2714 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2715 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2716 } else { 2717 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2718 } 2719 #endif 2720 2721 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2722 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 2723 2724 /* destroy list of free space and other temporary array(s) */ 2725 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2726 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2727 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2728 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2729 if (ai[am] != 0) { 2730 ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2731 } else { 2732 ratio_needed = 0.0; 2733 } 2734 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2735 2736 /* put together the new matrix in MATSEQSBAIJ format */ 2737 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2738 2739 b = (Mat_SeqSBAIJ*)(fact)->data; 2740 ierr = PetscFree2(b->imax,b->ilen);CHKERRQ(ierr); 2741 b->singlemalloc = PETSC_FALSE; 2742 b->free_a = PETSC_TRUE; 2743 b->free_ij = free_ij; 2744 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2745 b->j = uj; 2746 b->i = ui; 2747 b->diag = 0; 2748 b->ilen = 0; 2749 b->imax = 0; 2750 b->row = perm; 2751 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2752 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2753 b->icol = perm; 2754 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2755 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2756 b->maxnz = b->nz = ui[am]; 2757 2758 fact->info.factor_mallocs = reallocs; 2759 fact->info.fill_ratio_given = fill; 2760 fact->info.fill_ratio_needed = ratio_needed; 2761 if (perm_identity){ 2762 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace; 2763 } else { 2764 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace; 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769