1 /*$Id: baijfact2.c,v 1.29 1999/06/30 23:51:46 balay Exp bsmith $*/ 2 /* 3 Factorization code for BAIJ format. 4 */ 5 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/ilu.h" 9 #include "src/inline/dot.h" 10 11 /* ----------------------------------------------------------- */ 12 #undef __FUNC__ 13 #define __FUNC__ "MatSolve_SeqBAIJ_N" 14 int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 15 { 16 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 17 IS iscol=a->col,isrow=a->row; 18 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 19 int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 20 MatScalar *aa=a->a,*v; 21 Scalar *x,*b,*sum,*tmp,*lsum; 22 23 PetscFunctionBegin; 24 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 25 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 26 tmp = a->solve_work; 27 28 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 29 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 30 31 /* forward solve the lower triangular */ 32 ierr = PetscMemcpy(tmp,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 33 for ( i=1; i<n; i++ ) { 34 v = aa + bs2*ai[i]; 35 vi = aj + ai[i]; 36 nz = a->diag[i] - ai[i]; 37 sum = tmp + bs*i; 38 ierr = PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 39 while (nz--) { 40 Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++)); 41 v += bs2; 42 } 43 } 44 /* backward solve the upper triangular */ 45 lsum = a->solve_work + a->n; 46 for ( i=n-1; i>=0; i-- ){ 47 v = aa + bs2*(a->diag[i] + 1); 48 vi = aj + a->diag[i] + 1; 49 nz = ai[i+1] - a->diag[i] - 1; 50 ierr = PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 51 while (nz--) { 52 Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++)); 53 v += bs2; 54 } 55 Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs); 56 ierr = PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 57 } 58 59 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 60 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 61 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 62 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 63 PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNC__ 68 #define __FUNC__ "MatSolve_SeqBAIJ_7" 69 int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 70 { 71 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 72 IS iscol=a->col,isrow=a->row; 73 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 74 int *diag = a->diag; 75 MatScalar *aa=a->a,*v; 76 Scalar sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 77 Scalar *x,*b,*tmp; 78 79 PetscFunctionBegin; 80 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 81 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 82 tmp = a->solve_work; 83 84 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 85 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 86 87 /* forward solve the lower triangular */ 88 idx = 7*(*r++); 89 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 90 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 91 tmp[5] = b[5+idx]; tmp[6] = b[6+idx]; 92 93 for ( i=1; i<n; i++ ) { 94 v = aa + 49*ai[i]; 95 vi = aj + ai[i]; 96 nz = diag[i] - ai[i]; 97 idx = 7*(*r++); 98 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 99 sum5 = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx]; 100 while (nz--) { 101 idx = 7*(*vi++); 102 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 103 x4 = tmp[3+idx];x5 = tmp[4+idx]; 104 x6 = tmp[5+idx];x7 = tmp[6+idx]; 105 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 106 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 107 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 108 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 109 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 110 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 111 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 112 v += 49; 113 } 114 idx = 7*i; 115 tmp[idx] = sum1;tmp[1+idx] = sum2; 116 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 117 tmp[5+idx] = sum6;tmp[6+idx] = sum7; 118 } 119 /* backward solve the upper triangular */ 120 for ( i=n-1; i>=0; i-- ){ 121 v = aa + 49*diag[i] + 49; 122 vi = aj + diag[i] + 1; 123 nz = ai[i+1] - diag[i] - 1; 124 idt = 7*i; 125 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 126 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 127 sum6 = tmp[5+idt];sum7 = tmp[6+idt]; 128 while (nz--) { 129 idx = 7*(*vi++); 130 x1 = tmp[idx]; x2 = tmp[1+idx]; 131 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 132 x6 = tmp[5+idx]; x7 = tmp[6+idx]; 133 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 134 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 135 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 136 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 137 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 138 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 139 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 140 v += 49; 141 } 142 idc = 7*(*c--); 143 v = aa + 49*diag[i]; 144 x[idc] = tmp[idt] = v[0]*sum1+v[7]*sum2+v[14]*sum3+ 145 v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7; 146 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+ 147 v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7; 148 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+ 149 v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7; 150 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+ 151 v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7; 152 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+ 153 v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7; 154 x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+ 155 v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7; 156 x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+ 157 v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7; 158 } 159 160 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 161 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 162 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 163 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 164 PLogFlops(2*49*(a->nz) - 7*a->n); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNC__ 169 #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering" 170 int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 173 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 174 int ierr,*diag = a->diag,jdx; 175 MatScalar *aa=a->a,*v; 176 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 177 178 PetscFunctionBegin; 179 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 181 /* forward solve the lower triangular */ 182 idx = 0; 183 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 184 x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 185 x[6] = b[6+idx]; 186 for ( i=1; i<n; i++ ) { 187 v = aa + 49*ai[i]; 188 vi = aj + ai[i]; 189 nz = diag[i] - ai[i]; 190 idx = 7*i; 191 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 192 sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 193 sum7 = b[6+idx]; 194 while (nz--) { 195 jdx = 7*(*vi++); 196 x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 197 x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 198 x7 = x[6+jdx]; 199 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 200 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 201 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 202 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 203 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 204 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 205 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 206 v += 49; 207 } 208 x[idx] = sum1; 209 x[1+idx] = sum2; 210 x[2+idx] = sum3; 211 x[3+idx] = sum4; 212 x[4+idx] = sum5; 213 x[5+idx] = sum6; 214 x[6+idx] = sum7; 215 } 216 /* backward solve the upper triangular */ 217 for ( i=n-1; i>=0; i-- ){ 218 v = aa + 49*diag[i] + 49; 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 idt = 7*i; 222 sum1 = x[idt]; sum2 = x[1+idt]; 223 sum3 = x[2+idt]; sum4 = x[3+idt]; 224 sum5 = x[4+idt]; sum6 = x[5+idt]; 225 sum7 = x[6+idt]; 226 while (nz--) { 227 idx = 7*(*vi++); 228 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 229 x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 230 x7 = x[6+idx]; 231 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 232 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 233 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 234 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 235 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 236 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 237 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 238 v += 49; 239 } 240 v = aa + 49*diag[i]; 241 x[idt] = v[0]*sum1 + v[7]*sum2 + v[14]*sum3 + v[21]*sum4 242 + v[28]*sum5 + v[35]*sum6 + v[42]*sum7; 243 x[1+idt] = v[1]*sum1 + v[8]*sum2 + v[15]*sum3 + v[22]*sum4 244 + v[29]*sum5 + v[36]*sum6 + v[43]*sum7; 245 x[2+idt] = v[2]*sum1 + v[9]*sum2 + v[16]*sum3 + v[23]*sum4 246 + v[30]*sum5 + v[37]*sum6 + v[44]*sum7; 247 x[3+idt] = v[3]*sum1 + v[10]*sum2 + v[17]*sum3 + v[24]*sum4 248 + v[31]*sum5 + v[38]*sum6 + v[45]*sum7; 249 x[4+idt] = v[4]*sum1 + v[11]*sum2 + v[18]*sum3 + v[25]*sum4 250 + v[32]*sum5 + v[39]*sum6 + v[46]*sum7; 251 x[5+idt] = v[5]*sum1 + v[12]*sum2 + v[19]*sum3 + v[26]*sum4 252 + v[33]*sum5 + v[40]*sum6 + v[47]*sum7; 253 x[6+idt] = v[6]*sum1 + v[13]*sum2 + v[20]*sum3 + v[27]*sum4 254 + v[34]*sum5 + v[41]*sum6 + v[48]*sum7; 255 } 256 257 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 258 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 259 PLogFlops(2*36*(a->nz) - 6*a->n); 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNC__ 264 #define __FUNC__ "MatSolve_SeqBAIJ_6" 265 int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 266 { 267 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 268 IS iscol=a->col,isrow=a->row; 269 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 270 int *diag = a->diag; 271 MatScalar *aa=a->a,*v; 272 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp; 273 274 PetscFunctionBegin; 275 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 276 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 277 tmp = a->solve_work; 278 279 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 280 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 281 282 /* forward solve the lower triangular */ 283 idx = 6*(*r++); 284 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 285 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 286 tmp[4] = b[4+idx]; tmp[5] = b[5+idx]; 287 for ( i=1; i<n; i++ ) { 288 v = aa + 36*ai[i]; 289 vi = aj + ai[i]; 290 nz = diag[i] - ai[i]; 291 idx = 6*(*r++); 292 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 293 sum5 = b[4+idx]; sum6 = b[5+idx]; 294 while (nz--) { 295 idx = 6*(*vi++); 296 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 297 x4 = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx]; 298 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 299 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 300 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 301 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 302 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 303 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 304 v += 36; 305 } 306 idx = 6*i; 307 tmp[idx] = sum1;tmp[1+idx] = sum2; 308 tmp[2+idx] = sum3;tmp[3+idx] = sum4; 309 tmp[4+idx] = sum5;tmp[5+idx] = sum6; 310 } 311 /* backward solve the upper triangular */ 312 for ( i=n-1; i>=0; i-- ){ 313 v = aa + 36*diag[i] + 36; 314 vi = aj + diag[i] + 1; 315 nz = ai[i+1] - diag[i] - 1; 316 idt = 6*i; 317 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 318 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 319 sum5 = tmp[4+idt];sum6 = tmp[5+idt]; 320 while (nz--) { 321 idx = 6*(*vi++); 322 x1 = tmp[idx]; x2 = tmp[1+idx]; 323 x3 = tmp[2+idx]; x4 = tmp[3+idx]; 324 x5 = tmp[4+idx]; x6 = tmp[5+idx]; 325 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 326 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 327 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 328 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 329 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 330 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 331 v += 36; 332 } 333 idc = 6*(*c--); 334 v = aa + 36*diag[i]; 335 x[idc] = tmp[idt] = v[0]*sum1+v[6]*sum2+v[12]*sum3+ 336 v[18]*sum4+v[24]*sum5+v[30]*sum6; 337 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+ 338 v[19]*sum4+v[25]*sum5+v[31]*sum6; 339 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+ 340 v[20]*sum4+v[26]*sum5+v[32]*sum6; 341 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+ 342 v[21]*sum4+v[27]*sum5+v[33]*sum6; 343 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+ 344 v[22]*sum4+v[28]*sum5+v[34]*sum6; 345 x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+ 346 v[23]*sum4+v[29]*sum5+v[35]*sum6; 347 } 348 349 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 350 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 351 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 352 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 353 PLogFlops(2*36*(a->nz) - 6*a->n); 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNC__ 358 #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering" 359 int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 360 { 361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 362 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 363 int ierr,*diag = a->diag,jdx; 364 MatScalar *aa=a->a,*v; 365 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6; 366 367 PetscFunctionBegin; 368 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 370 /* forward solve the lower triangular */ 371 idx = 0; 372 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 373 x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 374 for ( i=1; i<n; i++ ) { 375 v = aa + 36*ai[i]; 376 vi = aj + ai[i]; 377 nz = diag[i] - ai[i]; 378 idx = 6*i; 379 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 380 sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 381 while (nz--) { 382 jdx = 6*(*vi++); 383 x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 384 x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 385 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 386 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 387 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 388 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 389 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 390 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 391 v += 36; 392 } 393 x[idx] = sum1; 394 x[1+idx] = sum2; 395 x[2+idx] = sum3; 396 x[3+idx] = sum4; 397 x[4+idx] = sum5; 398 x[5+idx] = sum6; 399 } 400 /* backward solve the upper triangular */ 401 for ( i=n-1; i>=0; i-- ){ 402 v = aa + 36*diag[i] + 36; 403 vi = aj + diag[i] + 1; 404 nz = ai[i+1] - diag[i] - 1; 405 idt = 6*i; 406 sum1 = x[idt]; sum2 = x[1+idt]; 407 sum3 = x[2+idt]; sum4 = x[3+idt]; 408 sum5 = x[4+idt]; sum6 = x[5+idt]; 409 while (nz--) { 410 idx = 6*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 413 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 414 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 415 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 416 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 417 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 418 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 419 v += 36; 420 } 421 v = aa + 36*diag[i]; 422 x[idt] = v[0]*sum1 + v[6]*sum2 + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6; 423 x[1+idt] = v[1]*sum1 + v[7]*sum2 + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6; 424 x[2+idt] = v[2]*sum1 + v[8]*sum2 + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6; 425 x[3+idt] = v[3]*sum1 + v[9]*sum2 + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6; 426 x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6; 427 x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6; 428 } 429 430 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 431 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 432 PLogFlops(2*36*(a->nz) - 6*a->n); 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNC__ 437 #define __FUNC__ "MatSolve_SeqBAIJ_5" 438 int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 439 { 440 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 441 IS iscol=a->col,isrow=a->row; 442 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 443 int *diag = a->diag; 444 MatScalar *aa=a->a,*v; 445 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp; 446 447 PetscFunctionBegin; 448 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 449 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 450 tmp = a->solve_work; 451 452 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 453 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 454 455 /* forward solve the lower triangular */ 456 idx = 5*(*r++); 457 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 458 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 459 for ( i=1; i<n; i++ ) { 460 v = aa + 25*ai[i]; 461 vi = aj + ai[i]; 462 nz = diag[i] - ai[i]; 463 idx = 5*(*r++); 464 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 465 sum5 = b[4+idx]; 466 while (nz--) { 467 idx = 5*(*vi++); 468 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 469 x4 = tmp[3+idx];x5 = tmp[4+idx]; 470 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 471 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 472 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 473 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 474 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 475 v += 25; 476 } 477 idx = 5*i; 478 tmp[idx] = sum1;tmp[1+idx] = sum2; 479 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 480 } 481 /* backward solve the upper triangular */ 482 for ( i=n-1; i>=0; i-- ){ 483 v = aa + 25*diag[i] + 25; 484 vi = aj + diag[i] + 1; 485 nz = ai[i+1] - diag[i] - 1; 486 idt = 5*i; 487 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 488 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 489 while (nz--) { 490 idx = 5*(*vi++); 491 x1 = tmp[idx]; x2 = tmp[1+idx]; 492 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 493 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 494 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 495 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 496 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 497 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 498 v += 25; 499 } 500 idc = 5*(*c--); 501 v = aa + 25*diag[i]; 502 x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 503 v[15]*sum4+v[20]*sum5; 504 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 505 v[16]*sum4+v[21]*sum5; 506 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 507 v[17]*sum4+v[22]*sum5; 508 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 509 v[18]*sum4+v[23]*sum5; 510 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 511 v[19]*sum4+v[24]*sum5; 512 } 513 514 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 515 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 516 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 517 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 518 PLogFlops(2*25*(a->nz) - 5*a->n); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNC__ 523 #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 524 int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 525 { 526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 527 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 528 int ierr,*diag = a->diag,jdx; 529 MatScalar *aa=a->a,*v; 530 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;; 531 532 PetscFunctionBegin; 533 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 534 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 535 /* forward solve the lower triangular */ 536 idx = 0; 537 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 538 for ( i=1; i<n; i++ ) { 539 v = aa + 25*ai[i]; 540 vi = aj + ai[i]; 541 nz = diag[i] - ai[i]; 542 idx = 5*i; 543 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx]; 544 while (nz--) { 545 jdx = 5*(*vi++); 546 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 547 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 548 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 549 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 550 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 551 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 552 v += 25; 553 } 554 x[idx] = sum1; 555 x[1+idx] = sum2; 556 x[2+idx] = sum3; 557 x[3+idx] = sum4; 558 x[4+idx] = sum5; 559 } 560 /* backward solve the upper triangular */ 561 for ( i=n-1; i>=0; i-- ){ 562 v = aa + 25*diag[i] + 25; 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 idt = 5*i; 566 sum1 = x[idt]; sum2 = x[1+idt]; 567 sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt]; 568 while (nz--) { 569 idx = 5*(*vi++); 570 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 571 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 572 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 573 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 574 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 575 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 576 v += 25; 577 } 578 v = aa + 25*diag[i]; 579 x[idt] = v[0]*sum1 + v[5]*sum2 + v[10]*sum3 + v[15]*sum4 + v[20]*sum5; 580 x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3 + v[16]*sum4 + v[21]*sum5; 581 x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3 + v[17]*sum4 + v[22]*sum5; 582 x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3 + v[18]*sum4 + v[23]*sum5; 583 x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3 + v[19]*sum4 + v[24]*sum5; 584 } 585 586 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 587 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 588 PLogFlops(2*25*(a->nz) - 5*a->n); 589 PetscFunctionReturn(0); 590 } 591 592 #undef __FUNC__ 593 #define __FUNC__ "MatSolve_SeqBAIJ_4" 594 int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 595 { 596 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 597 IS iscol=a->col,isrow=a->row; 598 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 599 int *diag = a->diag; 600 MatScalar *aa=a->a,*v; 601 Scalar *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp; 602 603 PetscFunctionBegin; 604 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 605 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 606 tmp = a->solve_work; 607 608 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 609 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 610 611 /* forward solve the lower triangular */ 612 idx = 4*(*r++); 613 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 614 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 615 for ( i=1; i<n; i++ ) { 616 v = aa + 16*ai[i]; 617 vi = aj + ai[i]; 618 nz = diag[i] - ai[i]; 619 idx = 4*(*r++); 620 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 621 while (nz--) { 622 idx = 4*(*vi++); 623 x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 624 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 625 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 626 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 627 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 628 v += 16; 629 } 630 idx = 4*i; 631 tmp[idx] = sum1;tmp[1+idx] = sum2; 632 tmp[2+idx] = sum3;tmp[3+idx] = sum4; 633 } 634 /* backward solve the upper triangular */ 635 for ( i=n-1; i>=0; i-- ){ 636 v = aa + 16*diag[i] + 16; 637 vi = aj + diag[i] + 1; 638 nz = ai[i+1] - diag[i] - 1; 639 idt = 4*i; 640 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 641 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 642 while (nz--) { 643 idx = 4*(*vi++); 644 x1 = tmp[idx]; x2 = tmp[1+idx]; 645 x3 = tmp[2+idx]; x4 = tmp[3+idx]; 646 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 647 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 648 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 649 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 650 v += 16; 651 } 652 idc = 4*(*c--); 653 v = aa + 16*diag[i]; 654 x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 655 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 656 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 657 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 658 } 659 660 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 661 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 662 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 664 PLogFlops(2*16*(a->nz) - 4*a->n); 665 PetscFunctionReturn(0); 666 } 667 668 669 /* 670 Special case where the matrix was ILU(0) factored in the natural 671 ordering. This eliminates the need for the column and row permutation. 672 */ 673 #undef __FUNC__ 674 #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 675 int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 676 { 677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 678 int n=a->mbs,*ai=a->i,*aj=a->j; 679 int ierr,*diag = a->diag; 680 MatScalar *aa=a->a; 681 Scalar *x,*b; 682 683 PetscFunctionBegin; 684 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 685 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 686 687 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 688 { 689 static Scalar w[2000]; /* very BAD need to fix */ 690 fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 691 } 692 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ) 693 { 694 static Scalar w[2000]; /* very BAD need to fix */ 695 fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 696 } 697 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 698 fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 699 #else 700 { 701 Scalar sum1,sum2,sum3,sum4,x1,x2,x3,x4; 702 MatScalar *v; 703 int jdx,idt,idx,nz,*vi,i; 704 705 /* forward solve the lower triangular */ 706 idx = 0; 707 x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 708 for ( i=1; i<n; i++ ) { 709 v = aa + 16*ai[i]; 710 vi = aj + ai[i]; 711 nz = diag[i] - ai[i]; 712 idx += 4; 713 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 714 while (nz--) { 715 jdx = 4*(*vi++); 716 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 717 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 718 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 719 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 720 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 721 v += 16; 722 } 723 x[idx] = sum1; 724 x[1+idx] = sum2; 725 x[2+idx] = sum3; 726 x[3+idx] = sum4; 727 } 728 /* backward solve the upper triangular */ 729 for ( i=n-1; i>=0; i-- ){ 730 v = aa + 16*diag[i] + 16; 731 vi = aj + diag[i] + 1; 732 nz = ai[i+1] - diag[i] - 1; 733 idt = 4*i; 734 sum1 = x[idt]; sum2 = x[1+idt]; 735 sum3 = x[2+idt];sum4 = x[3+idt]; 736 while (nz--) { 737 idx = 4*(*vi++); 738 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 739 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 740 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 741 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 742 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 743 v += 16; 744 } 745 v = aa + 16*diag[i]; 746 x[idt] = v[0]*sum1 + v[4]*sum2 + v[8]*sum3 + v[12]*sum4; 747 x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3 + v[13]*sum4; 748 x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4; 749 x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4; 750 } 751 } 752 #endif 753 754 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 755 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 756 PLogFlops(2*16*(a->nz) - 4*a->n); 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNC__ 761 #define __FUNC__ "MatSolve_SeqBAIJ_3" 762 int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 763 { 764 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 765 IS iscol=a->col,isrow=a->row; 766 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 767 int *diag = a->diag; 768 MatScalar *aa=a->a,*v; 769 Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp; 770 771 PetscFunctionBegin; 772 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 774 tmp = a->solve_work; 775 776 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 777 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 778 779 /* forward solve the lower triangular */ 780 idx = 3*(*r++); 781 tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 782 for ( i=1; i<n; i++ ) { 783 v = aa + 9*ai[i]; 784 vi = aj + ai[i]; 785 nz = diag[i] - ai[i]; 786 idx = 3*(*r++); 787 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 788 while (nz--) { 789 idx = 3*(*vi++); 790 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 791 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 792 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 793 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 794 v += 9; 795 } 796 idx = 3*i; 797 tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 798 } 799 /* backward solve the upper triangular */ 800 for ( i=n-1; i>=0; i-- ){ 801 v = aa + 9*diag[i] + 9; 802 vi = aj + diag[i] + 1; 803 nz = ai[i+1] - diag[i] - 1; 804 idt = 3*i; 805 sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 806 while (nz--) { 807 idx = 3*(*vi++); 808 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 809 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 810 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 811 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 812 v += 9; 813 } 814 idc = 3*(*c--); 815 v = aa + 9*diag[i]; 816 x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 817 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 818 x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 819 } 820 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 821 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 822 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 823 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 824 PLogFlops(2*9*(a->nz) - 3*a->n); 825 PetscFunctionReturn(0); 826 } 827 828 /* 829 Special case where the matrix was ILU(0) factored in the natural 830 ordering. This eliminates the need for the column and row permutation. 831 */ 832 #undef __FUNC__ 833 #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering" 834 int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 835 { 836 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 837 int n=a->mbs,*ai=a->i,*aj=a->j; 838 int ierr,*diag = a->diag; 839 MatScalar *aa=a->a, *v; 840 Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3; 841 int jdx,idt,idx,nz,*vi,i; 842 843 PetscFunctionBegin; 844 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 845 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 846 847 848 /* forward solve the lower triangular */ 849 idx = 0; 850 x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; 851 for ( i=1; i<n; i++ ) { 852 v = aa + 9*ai[i]; 853 vi = aj + ai[i]; 854 nz = diag[i] - ai[i]; 855 idx += 3; 856 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx]; 857 while (nz--) { 858 jdx = 3*(*vi++); 859 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx]; 860 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 861 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 862 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 863 v += 9; 864 } 865 x[idx] = sum1; 866 x[1+idx] = sum2; 867 x[2+idx] = sum3; 868 } 869 /* backward solve the upper triangular */ 870 for ( i=n-1; i>=0; i-- ){ 871 v = aa + 9*diag[i] + 9; 872 vi = aj + diag[i] + 1; 873 nz = ai[i+1] - diag[i] - 1; 874 idt = 3*i; 875 sum1 = x[idt]; sum2 = x[1+idt]; 876 sum3 = x[2+idt]; 877 while (nz--) { 878 idx = 3*(*vi++); 879 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 880 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 881 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 882 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 883 v += 9; 884 } 885 v = aa + 9*diag[i]; 886 x[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 887 x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 888 x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 889 } 890 891 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 PLogFlops(2*9*(a->nz) - 3*a->n); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNC__ 898 #define __FUNC__ "MatSolve_SeqBAIJ_2" 899 int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 900 { 901 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 902 IS iscol=a->col,isrow=a->row; 903 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 904 int *diag = a->diag; 905 MatScalar *aa=a->a,*v; 906 Scalar *x,*b,sum1,sum2,x1,x2,*tmp; 907 908 PetscFunctionBegin; 909 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 tmp = a->solve_work; 912 913 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 914 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 915 916 /* forward solve the lower triangular */ 917 idx = 2*(*r++); 918 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 919 for ( i=1; i<n; i++ ) { 920 v = aa + 4*ai[i]; 921 vi = aj + ai[i]; 922 nz = diag[i] - ai[i]; 923 idx = 2*(*r++); 924 sum1 = b[idx]; sum2 = b[1+idx]; 925 while (nz--) { 926 idx = 2*(*vi++); 927 x1 = tmp[idx]; x2 = tmp[1+idx]; 928 sum1 -= v[0]*x1 + v[2]*x2; 929 sum2 -= v[1]*x1 + v[3]*x2; 930 v += 4; 931 } 932 idx = 2*i; 933 tmp[idx] = sum1; tmp[1+idx] = sum2; 934 } 935 /* backward solve the upper triangular */ 936 for ( i=n-1; i>=0; i-- ){ 937 v = aa + 4*diag[i] + 4; 938 vi = aj + diag[i] + 1; 939 nz = ai[i+1] - diag[i] - 1; 940 idt = 2*i; 941 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 942 while (nz--) { 943 idx = 2*(*vi++); 944 x1 = tmp[idx]; x2 = tmp[1+idx]; 945 sum1 -= v[0]*x1 + v[2]*x2; 946 sum2 -= v[1]*x1 + v[3]*x2; 947 v += 4; 948 } 949 idc = 2*(*c--); 950 v = aa + 4*diag[i]; 951 x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 952 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 953 } 954 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 955 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 956 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 958 PLogFlops(2*4*(a->nz) - 2*a->n); 959 PetscFunctionReturn(0); 960 } 961 962 /* 963 Special case where the matrix was ILU(0) factored in the natural 964 ordering. This eliminates the need for the column and row permutation. 965 */ 966 #undef __FUNC__ 967 #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering" 968 int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 969 { 970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 971 int n=a->mbs,*ai=a->i,*aj=a->j; 972 int ierr,*diag = a->diag; 973 MatScalar *aa=a->a,*v; 974 Scalar *x,*b,sum1,sum2,x1,x2; 975 int jdx,idt,idx,nz,*vi,i; 976 977 PetscFunctionBegin; 978 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 979 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 980 981 /* forward solve the lower triangular */ 982 idx = 0; 983 x[0] = b[0]; x[1] = b[1]; 984 for ( i=1; i<n; i++ ) { 985 v = aa + 4*ai[i]; 986 vi = aj + ai[i]; 987 nz = diag[i] - ai[i]; 988 idx += 2; 989 sum1 = b[idx];sum2 = b[1+idx]; 990 while (nz--) { 991 jdx = 2*(*vi++); 992 x1 = x[jdx];x2 = x[1+jdx]; 993 sum1 -= v[0]*x1 + v[2]*x2; 994 sum2 -= v[1]*x1 + v[3]*x2; 995 v += 4; 996 } 997 x[idx] = sum1; 998 x[1+idx] = sum2; 999 } 1000 /* backward solve the upper triangular */ 1001 for ( i=n-1; i>=0; i-- ){ 1002 v = aa + 4*diag[i] + 4; 1003 vi = aj + diag[i] + 1; 1004 nz = ai[i+1] - diag[i] - 1; 1005 idt = 2*i; 1006 sum1 = x[idt]; sum2 = x[1+idt]; 1007 while (nz--) { 1008 idx = 2*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx]; 1010 sum1 -= v[0]*x1 + v[2]*x2; 1011 sum2 -= v[1]*x1 + v[3]*x2; 1012 v += 4; 1013 } 1014 v = aa + 4*diag[i]; 1015 x[idt] = v[0]*sum1 + v[2]*sum2; 1016 x[1+idt] = v[1]*sum1 + v[3]*sum2; 1017 } 1018 1019 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1020 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021 PLogFlops(2*4*(a->nz) - 2*a->n); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNC__ 1026 #define __FUNC__ "MatSolve_SeqBAIJ_1" 1027 int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 1028 { 1029 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1030 IS iscol=a->col,isrow=a->row; 1031 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 1032 int *diag = a->diag; 1033 MatScalar *aa=a->a,*v; 1034 Scalar *x,*b,sum1,*tmp; 1035 1036 PetscFunctionBegin; 1037 if (!n) PetscFunctionReturn(0); 1038 1039 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1041 tmp = a->solve_work; 1042 1043 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1044 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1045 1046 /* forward solve the lower triangular */ 1047 tmp[0] = b[*r++]; 1048 for ( i=1; i<n; i++ ) { 1049 v = aa + ai[i]; 1050 vi = aj + ai[i]; 1051 nz = diag[i] - ai[i]; 1052 sum1 = b[*r++]; 1053 while (nz--) { 1054 sum1 -= (*v++)*tmp[*vi++]; 1055 } 1056 tmp[i] = sum1; 1057 } 1058 /* backward solve the upper triangular */ 1059 for ( i=n-1; i>=0; i-- ){ 1060 v = aa + diag[i] + 1; 1061 vi = aj + diag[i] + 1; 1062 nz = ai[i+1] - diag[i] - 1; 1063 sum1 = tmp[i]; 1064 while (nz--) { 1065 sum1 -= (*v++)*tmp[*vi++]; 1066 } 1067 x[*c--] = tmp[i] = aa[diag[i]]*sum1; 1068 } 1069 1070 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1071 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1073 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1074 PLogFlops(2*1*(a->nz) - a->n); 1075 PetscFunctionReturn(0); 1076 } 1077 /* 1078 Special case where the matrix was ILU(0) factored in the natural 1079 ordering. This eliminates the need for the column and row permutation. 1080 */ 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering" 1083 int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1084 { 1085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1086 int n=a->mbs,*ai=a->i,*aj=a->j; 1087 int ierr,*diag = a->diag; 1088 MatScalar *aa=a->a; 1089 Scalar *x,*b; 1090 Scalar sum1,x1; 1091 MatScalar *v; 1092 int jdx,idt,idx,nz,*vi,i; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1096 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1097 1098 /* forward solve the lower triangular */ 1099 idx = 0; 1100 x[0] = b[0]; 1101 for ( i=1; i<n; i++ ) { 1102 v = aa + ai[i]; 1103 vi = aj + ai[i]; 1104 nz = diag[i] - ai[i]; 1105 idx += 1; 1106 sum1 = b[idx]; 1107 while (nz--) { 1108 jdx = *vi++; 1109 x1 = x[jdx]; 1110 sum1 -= v[0]*x1; 1111 v += 1; 1112 } 1113 x[idx] = sum1; 1114 } 1115 /* backward solve the upper triangular */ 1116 for ( i=n-1; i>=0; i-- ){ 1117 v = aa + diag[i] + 1; 1118 vi = aj + diag[i] + 1; 1119 nz = ai[i+1] - diag[i] - 1; 1120 idt = i; 1121 sum1 = x[idt]; 1122 while (nz--) { 1123 idx = *vi++; 1124 x1 = x[idx]; 1125 sum1 -= v[0]*x1; 1126 v += 1; 1127 } 1128 v = aa + diag[i]; 1129 x[idt] = v[0]*sum1; 1130 } 1131 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1133 PLogFlops(2*(a->nz) - a->n); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 /* ----------------------------------------------------------------*/ 1138 /* 1139 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 1140 except that the data structure of Mat_SeqAIJ is slightly different. 1141 Not a good example of code reuse. 1142 */ 1143 extern int MatMissingDiag_SeqBAIJ(Mat); 1144 1145 #undef __FUNC__ 1146 #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 1147 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 1148 { 1149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 1150 IS isicol; 1151 int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 1152 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 1153 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 1154 int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill; 1155 PetscTruth col_identity, row_identity; 1156 double f; 1157 1158 PetscFunctionBegin; 1159 if (info) { 1160 f = info->fill; 1161 levels = (int) info->levels; 1162 diagonal_fill = (int) info->diagonal_fill; 1163 } else { 1164 f = 1.0; 1165 levels = 0; 1166 diagonal_fill = 0; 1167 } 1168 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 1169 1170 /* special case that simply copies fill pattern */ 1171 PetscValidHeaderSpecific(isrow,IS_COOKIE); 1172 PetscValidHeaderSpecific(iscol,IS_COOKIE); 1173 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1174 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1175 if (levels == 0 && row_identity && col_identity) { 1176 ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1177 (*fact)->factor = FACTOR_LU; 1178 b = (Mat_SeqBAIJ *) (*fact)->data; 1179 if (!b->diag) { 1180 ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 1181 } 1182 ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 1183 b->row = isrow; 1184 b->col = iscol; 1185 b->icol = isicol; 1186 b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1187 /* 1188 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1189 for ILU(0) factorization with natural ordering 1190 */ 1191 switch (b->bs) { 1192 case 2: 1193 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1194 (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1195 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1196 break; 1197 case 3: 1198 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1199 (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1200 PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1201 break; 1202 case 4: 1203 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1204 (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1205 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1206 break; 1207 case 5: 1208 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1209 (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1210 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1211 break; 1212 case 6: 1213 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1214 (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1215 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1216 break; 1217 case 7: 1218 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1219 (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1220 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1221 break; 1222 } 1223 PetscFunctionReturn(0); 1224 } 1225 1226 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1227 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1228 1229 /* get new row pointers */ 1230 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 1231 ainew[0] = 0; 1232 /* don't know how many column pointers are needed so estimate */ 1233 jmax = (int) (f*ai[n] + 1); 1234 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 1235 /* ajfill is level of fill for each fill entry */ 1236 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 1237 /* fill is a linked list of nonzeros in active row */ 1238 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 1239 /* im is level for each filled value */ 1240 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 1241 /* dloc is location of diagonal in factor */ 1242 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 1243 dloc[0] = 0; 1244 for ( prow=0; prow<n; prow++ ) { 1245 1246 /* copy prow into linked list */ 1247 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 1248 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 1249 xi = aj + ai[r[prow]]; 1250 fill[n] = n; 1251 fill[prow] = -1; /* marker for diagonal entry */ 1252 while (nz--) { 1253 fm = n; 1254 idx = ic[*xi++]; 1255 do { 1256 m = fm; 1257 fm = fill[m]; 1258 } while (fm < idx); 1259 fill[m] = idx; 1260 fill[idx] = fm; 1261 im[idx] = 0; 1262 } 1263 1264 /* make sure diagonal entry is included */ 1265 if (diagonal_fill && fill[prow] == -1) { 1266 fm = n; 1267 while (fill[fm] < prow) fm = fill[fm]; 1268 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1269 fill[fm] = prow; 1270 im[prow] = 0; 1271 nzf++; 1272 dcount++; 1273 } 1274 1275 nzi = 0; 1276 row = fill[n]; 1277 while ( row < prow ) { 1278 incrlev = im[row] + 1; 1279 nz = dloc[row]; 1280 xi = ajnew + ainew[row] + nz + 1; 1281 flev = ajfill + ainew[row] + nz + 1; 1282 nnz = ainew[row+1] - ainew[row] - nz - 1; 1283 fm = row; 1284 while (nnz-- > 0) { 1285 idx = *xi++; 1286 if (*flev + incrlev > levels) { 1287 flev++; 1288 continue; 1289 } 1290 do { 1291 m = fm; 1292 fm = fill[m]; 1293 } while (fm < idx); 1294 if (fm != idx) { 1295 im[idx] = *flev + incrlev; 1296 fill[m] = idx; 1297 fill[idx] = fm; 1298 fm = idx; 1299 nzf++; 1300 } else { 1301 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1302 } 1303 flev++; 1304 } 1305 row = fill[row]; 1306 nzi++; 1307 } 1308 /* copy new filled row into permanent storage */ 1309 ainew[prow+1] = ainew[prow] + nzf; 1310 if (ainew[prow+1] > jmax) { 1311 1312 /* estimate how much additional space we will need */ 1313 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1314 /* just double the memory each time */ 1315 int maxadd = jmax; 1316 /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 1317 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1318 jmax += maxadd; 1319 1320 /* allocate a longer ajnew and ajfill */ 1321 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1322 ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1323 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1324 ajnew = xi; 1325 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1326 ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1327 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1328 ajfill = xi; 1329 realloc++; /* count how many reallocations are needed */ 1330 } 1331 xi = ajnew + ainew[prow]; 1332 flev = ajfill + ainew[prow]; 1333 dloc[prow] = nzi; 1334 fm = fill[n]; 1335 while (nzf--) { 1336 *xi++ = fm; 1337 *flev++ = im[fm]; 1338 fm = fill[fm]; 1339 } 1340 /* make sure row has diagonal entry */ 1341 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1342 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1343 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1344 } 1345 } 1346 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1348 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1349 ierr = PetscFree(fill);CHKERRQ(ierr); 1350 ierr = PetscFree(im);CHKERRQ(ierr); 1351 1352 { 1353 double af = ((double)ainew[n])/((double)ai[n]); 1354 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 1355 realloc,f,af); 1356 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 1357 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 1358 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 1359 if (diagonal_fill) { 1360 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount); 1361 } 1362 } 1363 1364 /* put together the new matrix */ 1365 ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1366 PLogObjectParent(*fact,isicol); 1367 b = (Mat_SeqBAIJ *) (*fact)->data; 1368 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1369 b->singlemalloc = 0; 1370 len = bs2*ainew[n]*sizeof(MatScalar); 1371 /* the next line frees the default space generated by the Create() */ 1372 ierr = PetscFree(b->a);CHKERRQ(ierr); 1373 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1374 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1375 b->j = ajnew; 1376 b->i = ainew; 1377 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1378 b->diag = dloc; 1379 b->ilen = 0; 1380 b->imax = 0; 1381 b->row = isrow; 1382 b->col = iscol; 1383 b->icol = isicol; 1384 b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1385 /* In b structure: Free imax, ilen, old a, old j. 1386 Allocate dloc, solve_work, new a, new j */ 1387 PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 1388 b->maxnz = b->nz = ainew[n]; 1389 (*fact)->factor = FACTOR_LU; 1390 1391 (*fact)->info.factor_mallocs = realloc; 1392 (*fact)->info.fill_ratio_given = f; 1393 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1394 1395 PetscFunctionReturn(0); 1396 } 1397 1398 1399 1400 1401