1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baijfact2.c,v 1.28 1999/05/12 03:29:44 bsmith Exp balay $"; 3 #endif 4 /* 5 Factorization code for BAIJ format. 6 */ 7 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/ilu.h" 11 #include "src/inline/dot.h" 12 13 /* ----------------------------------------------------------- */ 14 #undef __FUNC__ 15 #define __FUNC__ "MatSolve_SeqBAIJ_N" 16 int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 17 { 18 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 19 IS iscol=a->col,isrow=a->row; 20 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 21 int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 22 MatScalar *aa=a->a,*v; 23 Scalar *x,*b,*sum,*tmp,*lsum; 24 25 PetscFunctionBegin; 26 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 27 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28 tmp = a->solve_work; 29 30 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 31 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 32 33 /* forward solve the lower triangular */ 34 ierr = PetscMemcpy(tmp,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 35 for ( i=1; i<n; i++ ) { 36 v = aa + bs2*ai[i]; 37 vi = aj + ai[i]; 38 nz = a->diag[i] - ai[i]; 39 sum = tmp + bs*i; 40 ierr = PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 41 while (nz--) { 42 Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++)); 43 v += bs2; 44 } 45 } 46 /* backward solve the upper triangular */ 47 lsum = a->solve_work + a->n; 48 for ( i=n-1; i>=0; i-- ){ 49 v = aa + bs2*(a->diag[i] + 1); 50 vi = aj + a->diag[i] + 1; 51 nz = ai[i+1] - a->diag[i] - 1; 52 ierr = PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 53 while (nz--) { 54 Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++)); 55 v += bs2; 56 } 57 Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs); 58 ierr = PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 59 } 60 61 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 62 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 63 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 64 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 65 PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNC__ 70 #define __FUNC__ "MatSolve_SeqBAIJ_7" 71 int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 72 { 73 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 74 IS iscol=a->col,isrow=a->row; 75 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 76 int *diag = a->diag; 77 MatScalar *aa=a->a,*v; 78 Scalar sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 79 Scalar *x,*b,*tmp; 80 81 PetscFunctionBegin; 82 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 83 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 84 tmp = a->solve_work; 85 86 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 87 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 88 89 /* forward solve the lower triangular */ 90 idx = 7*(*r++); 91 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 92 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 93 tmp[5] = b[5+idx]; tmp[6] = b[6+idx]; 94 95 for ( i=1; i<n; i++ ) { 96 v = aa + 49*ai[i]; 97 vi = aj + ai[i]; 98 nz = diag[i] - ai[i]; 99 idx = 7*(*r++); 100 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 101 sum5 = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx]; 102 while (nz--) { 103 idx = 7*(*vi++); 104 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 105 x4 = tmp[3+idx];x5 = tmp[4+idx]; 106 x6 = tmp[5+idx];x7 = tmp[6+idx]; 107 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 108 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 109 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 110 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 111 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 112 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 113 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 114 v += 49; 115 } 116 idx = 7*i; 117 tmp[idx] = sum1;tmp[1+idx] = sum2; 118 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 119 tmp[5+idx] = sum6;tmp[6+idx] = sum7; 120 } 121 /* backward solve the upper triangular */ 122 for ( i=n-1; i>=0; i-- ){ 123 v = aa + 49*diag[i] + 49; 124 vi = aj + diag[i] + 1; 125 nz = ai[i+1] - diag[i] - 1; 126 idt = 7*i; 127 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 128 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 129 sum6 = tmp[5+idt];sum7 = tmp[6+idt]; 130 while (nz--) { 131 idx = 7*(*vi++); 132 x1 = tmp[idx]; x2 = tmp[1+idx]; 133 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 134 x6 = tmp[5+idx]; x7 = tmp[6+idx]; 135 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 136 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 137 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 138 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 139 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 140 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 141 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 142 v += 49; 143 } 144 idc = 7*(*c--); 145 v = aa + 49*diag[i]; 146 x[idc] = tmp[idt] = v[0]*sum1+v[7]*sum2+v[14]*sum3+ 147 v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7; 148 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+ 149 v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7; 150 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+ 151 v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7; 152 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+ 153 v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7; 154 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+ 155 v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7; 156 x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+ 157 v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7; 158 x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+ 159 v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7; 160 } 161 162 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 163 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 164 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 166 PLogFlops(2*49*(a->nz) - 7*a->n); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNC__ 171 #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering" 172 int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 173 { 174 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 175 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 176 int ierr,*diag = a->diag,jdx; 177 MatScalar *aa=a->a,*v; 178 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 179 180 PetscFunctionBegin; 181 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 183 /* forward solve the lower triangular */ 184 idx = 0; 185 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 186 x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 187 x[6] = b[6+idx]; 188 for ( i=1; i<n; i++ ) { 189 v = aa + 49*ai[i]; 190 vi = aj + ai[i]; 191 nz = diag[i] - ai[i]; 192 idx = 7*i; 193 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 194 sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 195 sum7 = b[6+idx]; 196 while (nz--) { 197 jdx = 7*(*vi++); 198 x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 199 x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 200 x7 = x[6+jdx]; 201 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 202 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 203 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 204 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 205 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 206 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 207 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 208 v += 49; 209 } 210 x[idx] = sum1; 211 x[1+idx] = sum2; 212 x[2+idx] = sum3; 213 x[3+idx] = sum4; 214 x[4+idx] = sum5; 215 x[5+idx] = sum6; 216 x[6+idx] = sum7; 217 } 218 /* backward solve the upper triangular */ 219 for ( i=n-1; i>=0; i-- ){ 220 v = aa + 49*diag[i] + 49; 221 vi = aj + diag[i] + 1; 222 nz = ai[i+1] - diag[i] - 1; 223 idt = 7*i; 224 sum1 = x[idt]; sum2 = x[1+idt]; 225 sum3 = x[2+idt]; sum4 = x[3+idt]; 226 sum5 = x[4+idt]; sum6 = x[5+idt]; 227 sum7 = x[6+idt]; 228 while (nz--) { 229 idx = 7*(*vi++); 230 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 231 x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 232 x7 = x[6+idx]; 233 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 234 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 235 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 236 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 237 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 238 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 239 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 240 v += 49; 241 } 242 v = aa + 49*diag[i]; 243 x[idt] = v[0]*sum1 + v[7]*sum2 + v[14]*sum3 + v[21]*sum4 244 + v[28]*sum5 + v[35]*sum6 + v[42]*sum7; 245 x[1+idt] = v[1]*sum1 + v[8]*sum2 + v[15]*sum3 + v[22]*sum4 246 + v[29]*sum5 + v[36]*sum6 + v[43]*sum7; 247 x[2+idt] = v[2]*sum1 + v[9]*sum2 + v[16]*sum3 + v[23]*sum4 248 + v[30]*sum5 + v[37]*sum6 + v[44]*sum7; 249 x[3+idt] = v[3]*sum1 + v[10]*sum2 + v[17]*sum3 + v[24]*sum4 250 + v[31]*sum5 + v[38]*sum6 + v[45]*sum7; 251 x[4+idt] = v[4]*sum1 + v[11]*sum2 + v[18]*sum3 + v[25]*sum4 252 + v[32]*sum5 + v[39]*sum6 + v[46]*sum7; 253 x[5+idt] = v[5]*sum1 + v[12]*sum2 + v[19]*sum3 + v[26]*sum4 254 + v[33]*sum5 + v[40]*sum6 + v[47]*sum7; 255 x[6+idt] = v[6]*sum1 + v[13]*sum2 + v[20]*sum3 + v[27]*sum4 256 + v[34]*sum5 + v[41]*sum6 + v[48]*sum7; 257 } 258 259 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 260 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 261 PLogFlops(2*36*(a->nz) - 6*a->n); 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNC__ 266 #define __FUNC__ "MatSolve_SeqBAIJ_6" 267 int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 268 { 269 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 270 IS iscol=a->col,isrow=a->row; 271 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 272 int *diag = a->diag; 273 MatScalar *aa=a->a,*v; 274 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6,*tmp; 275 276 PetscFunctionBegin; 277 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 278 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 279 tmp = a->solve_work; 280 281 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 282 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 283 284 /* forward solve the lower triangular */ 285 idx = 6*(*r++); 286 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 287 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 288 tmp[4] = b[4+idx]; tmp[5] = b[5+idx]; 289 for ( i=1; i<n; i++ ) { 290 v = aa + 36*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 idx = 6*(*r++); 294 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 295 sum5 = b[4+idx]; sum6 = b[5+idx]; 296 while (nz--) { 297 idx = 6*(*vi++); 298 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 299 x4 = tmp[3+idx]; x5 = tmp[4+idx]; x6 = tmp[5+idx]; 300 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 301 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 302 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 303 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 304 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 305 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 306 v += 36; 307 } 308 idx = 6*i; 309 tmp[idx] = sum1;tmp[1+idx] = sum2; 310 tmp[2+idx] = sum3;tmp[3+idx] = sum4; 311 tmp[4+idx] = sum5;tmp[5+idx] = sum6; 312 } 313 /* backward solve the upper triangular */ 314 for ( i=n-1; i>=0; i-- ){ 315 v = aa + 36*diag[i] + 36; 316 vi = aj + diag[i] + 1; 317 nz = ai[i+1] - diag[i] - 1; 318 idt = 6*i; 319 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 320 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 321 sum5 = tmp[4+idt];sum6 = tmp[5+idt]; 322 while (nz--) { 323 idx = 6*(*vi++); 324 x1 = tmp[idx]; x2 = tmp[1+idx]; 325 x3 = tmp[2+idx]; x4 = tmp[3+idx]; 326 x5 = tmp[4+idx]; x6 = tmp[5+idx]; 327 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 328 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 329 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 330 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 331 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 332 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 333 v += 36; 334 } 335 idc = 6*(*c--); 336 v = aa + 36*diag[i]; 337 x[idc] = tmp[idt] = v[0]*sum1+v[6]*sum2+v[12]*sum3+ 338 v[18]*sum4+v[24]*sum5+v[30]*sum6; 339 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[7]*sum2+v[13]*sum3+ 340 v[19]*sum4+v[25]*sum5+v[31]*sum6; 341 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[8]*sum2+v[14]*sum3+ 342 v[20]*sum4+v[26]*sum5+v[32]*sum6; 343 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[9]*sum2+v[15]*sum3+ 344 v[21]*sum4+v[27]*sum5+v[33]*sum6; 345 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[10]*sum2+v[16]*sum3+ 346 v[22]*sum4+v[28]*sum5+v[34]*sum6; 347 x[5+idc] = tmp[5+idt] = v[5]*sum1+v[11]*sum2+v[17]*sum3+ 348 v[23]*sum4+v[29]*sum5+v[35]*sum6; 349 } 350 351 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 352 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 353 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 354 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 355 PLogFlops(2*36*(a->nz) - 6*a->n); 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNC__ 360 #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering" 361 int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 364 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 365 int ierr,*diag = a->diag,jdx; 366 MatScalar *aa=a->a,*v; 367 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,sum6,x1,x2,x3,x4,x5,x6; 368 369 PetscFunctionBegin; 370 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 371 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 372 /* forward solve the lower triangular */ 373 idx = 0; 374 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 375 x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 376 for ( i=1; i<n; i++ ) { 377 v = aa + 36*ai[i]; 378 vi = aj + ai[i]; 379 nz = diag[i] - ai[i]; 380 idx = 6*i; 381 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 382 sum4 = b[3+idx]; sum5 = b[4+idx]; sum6 = b[5+idx]; 383 while (nz--) { 384 jdx = 6*(*vi++); 385 x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 386 x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 387 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 388 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 389 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 390 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 391 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 392 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 393 v += 36; 394 } 395 x[idx] = sum1; 396 x[1+idx] = sum2; 397 x[2+idx] = sum3; 398 x[3+idx] = sum4; 399 x[4+idx] = sum5; 400 x[5+idx] = sum6; 401 } 402 /* backward solve the upper triangular */ 403 for ( i=n-1; i>=0; i-- ){ 404 v = aa + 36*diag[i] + 36; 405 vi = aj + diag[i] + 1; 406 nz = ai[i+1] - diag[i] - 1; 407 idt = 6*i; 408 sum1 = x[idt]; sum2 = x[1+idt]; 409 sum3 = x[2+idt]; sum4 = x[3+idt]; 410 sum5 = x[4+idt]; sum6 = x[5+idt]; 411 while (nz--) { 412 idx = 6*(*vi++); 413 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 414 x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 415 sum1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 416 sum2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 417 sum3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 418 sum4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 419 sum5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 420 sum6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 421 v += 36; 422 } 423 v = aa + 36*diag[i]; 424 x[idt] = v[0]*sum1 + v[6]*sum2 + v[12]*sum3 + v[18]*sum4 + v[24]*sum5 + v[30]*sum6; 425 x[1+idt] = v[1]*sum1 + v[7]*sum2 + v[13]*sum3 + v[19]*sum4 + v[25]*sum5 + v[31]*sum6; 426 x[2+idt] = v[2]*sum1 + v[8]*sum2 + v[14]*sum3 + v[20]*sum4 + v[26]*sum5 + v[32]*sum6; 427 x[3+idt] = v[3]*sum1 + v[9]*sum2 + v[15]*sum3 + v[21]*sum4 + v[27]*sum5 + v[33]*sum6; 428 x[4+idt] = v[4]*sum1 + v[10]*sum2 + v[16]*sum3 + v[22]*sum4 + v[28]*sum5 + v[34]*sum6; 429 x[5+idt] = v[5]*sum1 + v[11]*sum2 + v[17]*sum3 + v[23]*sum4 + v[29]*sum5 + v[35]*sum6; 430 } 431 432 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 433 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 434 PLogFlops(2*36*(a->nz) - 6*a->n); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatSolve_SeqBAIJ_5" 440 int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 441 { 442 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 443 IS iscol=a->col,isrow=a->row; 444 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 445 int *diag = a->diag; 446 MatScalar *aa=a->a,*v; 447 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp; 448 449 PetscFunctionBegin; 450 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 451 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 452 tmp = a->solve_work; 453 454 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 455 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 456 457 /* forward solve the lower triangular */ 458 idx = 5*(*r++); 459 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 460 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 461 for ( i=1; i<n; i++ ) { 462 v = aa + 25*ai[i]; 463 vi = aj + ai[i]; 464 nz = diag[i] - ai[i]; 465 idx = 5*(*r++); 466 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 467 sum5 = b[4+idx]; 468 while (nz--) { 469 idx = 5*(*vi++); 470 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 471 x4 = tmp[3+idx];x5 = tmp[4+idx]; 472 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 473 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 474 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 475 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 476 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 477 v += 25; 478 } 479 idx = 5*i; 480 tmp[idx] = sum1;tmp[1+idx] = sum2; 481 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 482 } 483 /* backward solve the upper triangular */ 484 for ( i=n-1; i>=0; i-- ){ 485 v = aa + 25*diag[i] + 25; 486 vi = aj + diag[i] + 1; 487 nz = ai[i+1] - diag[i] - 1; 488 idt = 5*i; 489 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 490 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 491 while (nz--) { 492 idx = 5*(*vi++); 493 x1 = tmp[idx]; x2 = tmp[1+idx]; 494 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 495 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 496 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 497 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 498 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 499 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 500 v += 25; 501 } 502 idc = 5*(*c--); 503 v = aa + 25*diag[i]; 504 x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 505 v[15]*sum4+v[20]*sum5; 506 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 507 v[16]*sum4+v[21]*sum5; 508 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 509 v[17]*sum4+v[22]*sum5; 510 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 511 v[18]*sum4+v[23]*sum5; 512 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 513 v[19]*sum4+v[24]*sum5; 514 } 515 516 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 517 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 518 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 520 PLogFlops(2*25*(a->nz) - 5*a->n); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNC__ 525 #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 526 int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 527 { 528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 529 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 530 int ierr,*diag = a->diag,jdx; 531 MatScalar *aa=a->a,*v; 532 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;; 533 534 PetscFunctionBegin; 535 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 537 /* forward solve the lower triangular */ 538 idx = 0; 539 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 540 for ( i=1; i<n; i++ ) { 541 v = aa + 25*ai[i]; 542 vi = aj + ai[i]; 543 nz = diag[i] - ai[i]; 544 idx = 5*i; 545 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx]; 546 while (nz--) { 547 jdx = 5*(*vi++); 548 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 549 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 550 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 551 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 552 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 553 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 554 v += 25; 555 } 556 x[idx] = sum1; 557 x[1+idx] = sum2; 558 x[2+idx] = sum3; 559 x[3+idx] = sum4; 560 x[4+idx] = sum5; 561 } 562 /* backward solve the upper triangular */ 563 for ( i=n-1; i>=0; i-- ){ 564 v = aa + 25*diag[i] + 25; 565 vi = aj + diag[i] + 1; 566 nz = ai[i+1] - diag[i] - 1; 567 idt = 5*i; 568 sum1 = x[idt]; sum2 = x[1+idt]; 569 sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt]; 570 while (nz--) { 571 idx = 5*(*vi++); 572 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 573 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 574 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 575 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 576 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 577 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 578 v += 25; 579 } 580 v = aa + 25*diag[i]; 581 x[idt] = v[0]*sum1 + v[5]*sum2 + v[10]*sum3 + v[15]*sum4 + v[20]*sum5; 582 x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3 + v[16]*sum4 + v[21]*sum5; 583 x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3 + v[17]*sum4 + v[22]*sum5; 584 x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3 + v[18]*sum4 + v[23]*sum5; 585 x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3 + v[19]*sum4 + v[24]*sum5; 586 } 587 588 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 589 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 590 PLogFlops(2*25*(a->nz) - 5*a->n); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatSolve_SeqBAIJ_4" 596 int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 597 { 598 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 599 IS iscol=a->col,isrow=a->row; 600 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 601 int *diag = a->diag; 602 MatScalar *aa=a->a,*v; 603 Scalar *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp; 604 605 PetscFunctionBegin; 606 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 607 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 608 tmp = a->solve_work; 609 610 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 611 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 612 613 /* forward solve the lower triangular */ 614 idx = 4*(*r++); 615 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 616 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 617 for ( i=1; i<n; i++ ) { 618 v = aa + 16*ai[i]; 619 vi = aj + ai[i]; 620 nz = diag[i] - ai[i]; 621 idx = 4*(*r++); 622 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 623 while (nz--) { 624 idx = 4*(*vi++); 625 x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 626 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 627 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 628 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 629 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 630 v += 16; 631 } 632 idx = 4*i; 633 tmp[idx] = sum1;tmp[1+idx] = sum2; 634 tmp[2+idx] = sum3;tmp[3+idx] = sum4; 635 } 636 /* backward solve the upper triangular */ 637 for ( i=n-1; i>=0; i-- ){ 638 v = aa + 16*diag[i] + 16; 639 vi = aj + diag[i] + 1; 640 nz = ai[i+1] - diag[i] - 1; 641 idt = 4*i; 642 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 643 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 644 while (nz--) { 645 idx = 4*(*vi++); 646 x1 = tmp[idx]; x2 = tmp[1+idx]; 647 x3 = tmp[2+idx]; x4 = tmp[3+idx]; 648 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 649 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 650 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 651 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 652 v += 16; 653 } 654 idc = 4*(*c--); 655 v = aa + 16*diag[i]; 656 x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 657 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 658 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 659 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 660 } 661 662 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 664 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 665 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 666 PLogFlops(2*16*(a->nz) - 4*a->n); 667 PetscFunctionReturn(0); 668 } 669 670 671 /* 672 Special case where the matrix was ILU(0) factored in the natural 673 ordering. This eliminates the need for the column and row permutation. 674 */ 675 #undef __FUNC__ 676 #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 677 int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 678 { 679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 680 int n=a->mbs,*ai=a->i,*aj=a->j; 681 int ierr,*diag = a->diag; 682 MatScalar *aa=a->a; 683 Scalar *x,*b; 684 685 PetscFunctionBegin; 686 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 689 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 690 { 691 static Scalar w[2000]; /* very BAD need to fix */ 692 fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 693 } 694 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ) 695 { 696 static Scalar w[2000]; /* very BAD need to fix */ 697 fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 698 } 699 #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 700 fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 701 #else 702 { 703 Scalar sum1,sum2,sum3,sum4,x1,x2,x3,x4; 704 MatScalar *v; 705 int jdx,idt,idx,nz,*vi,i; 706 707 /* forward solve the lower triangular */ 708 idx = 0; 709 x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 710 for ( i=1; i<n; i++ ) { 711 v = aa + 16*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 idx += 4; 715 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 716 while (nz--) { 717 jdx = 4*(*vi++); 718 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 719 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 720 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 721 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 722 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 723 v += 16; 724 } 725 x[idx] = sum1; 726 x[1+idx] = sum2; 727 x[2+idx] = sum3; 728 x[3+idx] = sum4; 729 } 730 /* backward solve the upper triangular */ 731 for ( i=n-1; i>=0; i-- ){ 732 v = aa + 16*diag[i] + 16; 733 vi = aj + diag[i] + 1; 734 nz = ai[i+1] - diag[i] - 1; 735 idt = 4*i; 736 sum1 = x[idt]; sum2 = x[1+idt]; 737 sum3 = x[2+idt];sum4 = x[3+idt]; 738 while (nz--) { 739 idx = 4*(*vi++); 740 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 741 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 742 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 743 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 744 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 745 v += 16; 746 } 747 v = aa + 16*diag[i]; 748 x[idt] = v[0]*sum1 + v[4]*sum2 + v[8]*sum3 + v[12]*sum4; 749 x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3 + v[13]*sum4; 750 x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4; 751 x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4; 752 } 753 } 754 #endif 755 756 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 757 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 758 PLogFlops(2*16*(a->nz) - 4*a->n); 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNC__ 763 #define __FUNC__ "MatSolve_SeqBAIJ_3" 764 int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 765 { 766 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 767 IS iscol=a->col,isrow=a->row; 768 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 769 int *diag = a->diag; 770 MatScalar *aa=a->a,*v; 771 Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp; 772 773 PetscFunctionBegin; 774 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 775 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 776 tmp = a->solve_work; 777 778 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 779 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 780 781 /* forward solve the lower triangular */ 782 idx = 3*(*r++); 783 tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 784 for ( i=1; i<n; i++ ) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = diag[i] - ai[i]; 788 idx = 3*(*r++); 789 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 790 while (nz--) { 791 idx = 3*(*vi++); 792 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 793 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 794 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 795 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 796 v += 9; 797 } 798 idx = 3*i; 799 tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 800 } 801 /* backward solve the upper triangular */ 802 for ( i=n-1; i>=0; i-- ){ 803 v = aa + 9*diag[i] + 9; 804 vi = aj + diag[i] + 1; 805 nz = ai[i+1] - diag[i] - 1; 806 idt = 3*i; 807 sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 808 while (nz--) { 809 idx = 3*(*vi++); 810 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 811 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 812 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 813 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 814 v += 9; 815 } 816 idc = 3*(*c--); 817 v = aa + 9*diag[i]; 818 x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 819 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 820 x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 821 } 822 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 823 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 824 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 825 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 826 PLogFlops(2*9*(a->nz) - 3*a->n); 827 PetscFunctionReturn(0); 828 } 829 830 /* 831 Special case where the matrix was ILU(0) factored in the natural 832 ordering. This eliminates the need for the column and row permutation. 833 */ 834 #undef __FUNC__ 835 #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering" 836 int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 837 { 838 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 839 int n=a->mbs,*ai=a->i,*aj=a->j; 840 int ierr,*diag = a->diag; 841 MatScalar *aa=a->a, *v; 842 Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3; 843 int jdx,idt,idx,nz,*vi,i; 844 845 PetscFunctionBegin; 846 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 847 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 848 849 850 /* forward solve the lower triangular */ 851 idx = 0; 852 x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; 853 for ( i=1; i<n; i++ ) { 854 v = aa + 9*ai[i]; 855 vi = aj + ai[i]; 856 nz = diag[i] - ai[i]; 857 idx += 3; 858 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx]; 859 while (nz--) { 860 jdx = 3*(*vi++); 861 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx]; 862 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 863 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 864 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 865 v += 9; 866 } 867 x[idx] = sum1; 868 x[1+idx] = sum2; 869 x[2+idx] = sum3; 870 } 871 /* backward solve the upper triangular */ 872 for ( i=n-1; i>=0; i-- ){ 873 v = aa + 9*diag[i] + 9; 874 vi = aj + diag[i] + 1; 875 nz = ai[i+1] - diag[i] - 1; 876 idt = 3*i; 877 sum1 = x[idt]; sum2 = x[1+idt]; 878 sum3 = x[2+idt]; 879 while (nz--) { 880 idx = 3*(*vi++); 881 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 882 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 883 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 884 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 885 v += 9; 886 } 887 v = aa + 9*diag[i]; 888 x[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 889 x[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 890 x[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 891 } 892 893 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 894 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 895 PLogFlops(2*9*(a->nz) - 3*a->n); 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNC__ 900 #define __FUNC__ "MatSolve_SeqBAIJ_2" 901 int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 902 { 903 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 904 IS iscol=a->col,isrow=a->row; 905 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 906 int *diag = a->diag; 907 MatScalar *aa=a->a,*v; 908 Scalar *x,*b,sum1,sum2,x1,x2,*tmp; 909 910 PetscFunctionBegin; 911 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 912 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 913 tmp = a->solve_work; 914 915 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 916 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 917 918 /* forward solve the lower triangular */ 919 idx = 2*(*r++); 920 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 921 for ( i=1; i<n; i++ ) { 922 v = aa + 4*ai[i]; 923 vi = aj + ai[i]; 924 nz = diag[i] - ai[i]; 925 idx = 2*(*r++); 926 sum1 = b[idx]; sum2 = b[1+idx]; 927 while (nz--) { 928 idx = 2*(*vi++); 929 x1 = tmp[idx]; x2 = tmp[1+idx]; 930 sum1 -= v[0]*x1 + v[2]*x2; 931 sum2 -= v[1]*x1 + v[3]*x2; 932 v += 4; 933 } 934 idx = 2*i; 935 tmp[idx] = sum1; tmp[1+idx] = sum2; 936 } 937 /* backward solve the upper triangular */ 938 for ( i=n-1; i>=0; i-- ){ 939 v = aa + 4*diag[i] + 4; 940 vi = aj + diag[i] + 1; 941 nz = ai[i+1] - diag[i] - 1; 942 idt = 2*i; 943 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 944 while (nz--) { 945 idx = 2*(*vi++); 946 x1 = tmp[idx]; x2 = tmp[1+idx]; 947 sum1 -= v[0]*x1 + v[2]*x2; 948 sum2 -= v[1]*x1 + v[3]*x2; 949 v += 4; 950 } 951 idc = 2*(*c--); 952 v = aa + 4*diag[i]; 953 x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 954 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 955 } 956 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 957 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 958 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 959 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 960 PLogFlops(2*4*(a->nz) - 2*a->n); 961 PetscFunctionReturn(0); 962 } 963 964 /* 965 Special case where the matrix was ILU(0) factored in the natural 966 ordering. This eliminates the need for the column and row permutation. 967 */ 968 #undef __FUNC__ 969 #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering" 970 int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 973 int n=a->mbs,*ai=a->i,*aj=a->j; 974 int ierr,*diag = a->diag; 975 MatScalar *aa=a->a,*v; 976 Scalar *x,*b,sum1,sum2,x1,x2; 977 int jdx,idt,idx,nz,*vi,i; 978 979 PetscFunctionBegin; 980 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 982 983 /* forward solve the lower triangular */ 984 idx = 0; 985 x[0] = b[0]; x[1] = b[1]; 986 for ( i=1; i<n; i++ ) { 987 v = aa + 4*ai[i]; 988 vi = aj + ai[i]; 989 nz = diag[i] - ai[i]; 990 idx += 2; 991 sum1 = b[idx];sum2 = b[1+idx]; 992 while (nz--) { 993 jdx = 2*(*vi++); 994 x1 = x[jdx];x2 = x[1+jdx]; 995 sum1 -= v[0]*x1 + v[2]*x2; 996 sum2 -= v[1]*x1 + v[3]*x2; 997 v += 4; 998 } 999 x[idx] = sum1; 1000 x[1+idx] = sum2; 1001 } 1002 /* backward solve the upper triangular */ 1003 for ( i=n-1; i>=0; i-- ){ 1004 v = aa + 4*diag[i] + 4; 1005 vi = aj + diag[i] + 1; 1006 nz = ai[i+1] - diag[i] - 1; 1007 idt = 2*i; 1008 sum1 = x[idt]; sum2 = x[1+idt]; 1009 while (nz--) { 1010 idx = 2*(*vi++); 1011 x1 = x[idx]; x2 = x[1+idx]; 1012 sum1 -= v[0]*x1 + v[2]*x2; 1013 sum2 -= v[1]*x1 + v[3]*x2; 1014 v += 4; 1015 } 1016 v = aa + 4*diag[i]; 1017 x[idt] = v[0]*sum1 + v[2]*sum2; 1018 x[1+idt] = v[1]*sum1 + v[3]*sum2; 1019 } 1020 1021 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1022 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1023 PLogFlops(2*4*(a->nz) - 2*a->n); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNC__ 1028 #define __FUNC__ "MatSolve_SeqBAIJ_1" 1029 int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 1030 { 1031 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1032 IS iscol=a->col,isrow=a->row; 1033 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 1034 int *diag = a->diag; 1035 MatScalar *aa=a->a,*v; 1036 Scalar *x,*b,sum1,*tmp; 1037 1038 PetscFunctionBegin; 1039 if (!n) PetscFunctionReturn(0); 1040 1041 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1042 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1043 tmp = a->solve_work; 1044 1045 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1046 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1047 1048 /* forward solve the lower triangular */ 1049 tmp[0] = b[*r++]; 1050 for ( i=1; i<n; i++ ) { 1051 v = aa + ai[i]; 1052 vi = aj + ai[i]; 1053 nz = diag[i] - ai[i]; 1054 sum1 = b[*r++]; 1055 while (nz--) { 1056 sum1 -= (*v++)*tmp[*vi++]; 1057 } 1058 tmp[i] = sum1; 1059 } 1060 /* backward solve the upper triangular */ 1061 for ( i=n-1; i>=0; i-- ){ 1062 v = aa + diag[i] + 1; 1063 vi = aj + diag[i] + 1; 1064 nz = ai[i+1] - diag[i] - 1; 1065 sum1 = tmp[i]; 1066 while (nz--) { 1067 sum1 -= (*v++)*tmp[*vi++]; 1068 } 1069 x[*c--] = tmp[i] = aa[diag[i]]*sum1; 1070 } 1071 1072 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1076 PLogFlops(2*1*(a->nz) - a->n); 1077 PetscFunctionReturn(0); 1078 } 1079 /* 1080 Special case where the matrix was ILU(0) factored in the natural 1081 ordering. This eliminates the need for the column and row permutation. 1082 */ 1083 #undef __FUNC__ 1084 #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering" 1085 int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 1086 { 1087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1088 int n=a->mbs,*ai=a->i,*aj=a->j; 1089 int ierr,*diag = a->diag; 1090 MatScalar *aa=a->a; 1091 Scalar *x,*b; 1092 Scalar sum1,x1; 1093 MatScalar *v; 1094 int jdx,idt,idx,nz,*vi,i; 1095 1096 PetscFunctionBegin; 1097 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1098 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1099 1100 /* forward solve the lower triangular */ 1101 idx = 0; 1102 x[0] = b[0]; 1103 for ( i=1; i<n; i++ ) { 1104 v = aa + ai[i]; 1105 vi = aj + ai[i]; 1106 nz = diag[i] - ai[i]; 1107 idx += 1; 1108 sum1 = b[idx]; 1109 while (nz--) { 1110 jdx = *vi++; 1111 x1 = x[jdx]; 1112 sum1 -= v[0]*x1; 1113 v += 1; 1114 } 1115 x[idx] = sum1; 1116 } 1117 /* backward solve the upper triangular */ 1118 for ( i=n-1; i>=0; i-- ){ 1119 v = aa + diag[i] + 1; 1120 vi = aj + diag[i] + 1; 1121 nz = ai[i+1] - diag[i] - 1; 1122 idt = i; 1123 sum1 = x[idt]; 1124 while (nz--) { 1125 idx = *vi++; 1126 x1 = x[idx]; 1127 sum1 -= v[0]*x1; 1128 v += 1; 1129 } 1130 v = aa + diag[i]; 1131 x[idt] = v[0]*sum1; 1132 } 1133 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1134 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1135 PLogFlops(2*(a->nz) - a->n); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 /* ----------------------------------------------------------------*/ 1140 /* 1141 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 1142 except that the data structure of Mat_SeqAIJ is slightly different. 1143 Not a good example of code reuse. 1144 */ 1145 extern int MatMissingDiag_SeqBAIJ(Mat); 1146 1147 #undef __FUNC__ 1148 #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 1149 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 1150 { 1151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 1152 IS isicol; 1153 int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 1154 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 1155 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 1156 int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill; 1157 PetscTruth col_identity, row_identity; 1158 double f; 1159 1160 PetscFunctionBegin; 1161 if (info) { 1162 f = info->fill; 1163 levels = (int) info->levels; 1164 diagonal_fill = (int) info->diagonal_fill; 1165 } else { 1166 f = 1.0; 1167 levels = 0; 1168 diagonal_fill = 0; 1169 } 1170 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 1171 1172 /* special case that simply copies fill pattern */ 1173 PetscValidHeaderSpecific(isrow,IS_COOKIE); 1174 PetscValidHeaderSpecific(iscol,IS_COOKIE); 1175 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1176 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1177 if (levels == 0 && row_identity && col_identity) { 1178 ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1179 (*fact)->factor = FACTOR_LU; 1180 b = (Mat_SeqBAIJ *) (*fact)->data; 1181 if (!b->diag) { 1182 ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 1183 } 1184 ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 1185 b->row = isrow; 1186 b->col = iscol; 1187 b->icol = isicol; 1188 b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1189 /* 1190 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1191 for ILU(0) factorization with natural ordering 1192 */ 1193 switch (b->bs) { 1194 case 2: 1195 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1196 (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1197 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1198 break; 1199 case 3: 1200 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1201 (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1202 PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 1203 break; 1204 case 4: 1205 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1206 (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1207 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1208 break; 1209 case 5: 1210 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1211 (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1212 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1213 break; 1214 case 6: 1215 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1216 (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1217 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1218 break; 1219 case 7: 1220 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1221 (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1222 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1223 break; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1229 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1230 1231 /* get new row pointers */ 1232 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 1233 ainew[0] = 0; 1234 /* don't know how many column pointers are needed so estimate */ 1235 jmax = (int) (f*ai[n] + 1); 1236 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 1237 /* ajfill is level of fill for each fill entry */ 1238 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 1239 /* fill is a linked list of nonzeros in active row */ 1240 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 1241 /* im is level for each filled value */ 1242 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 1243 /* dloc is location of diagonal in factor */ 1244 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 1245 dloc[0] = 0; 1246 for ( prow=0; prow<n; prow++ ) { 1247 1248 /* copy prow into linked list */ 1249 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 1250 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 1251 xi = aj + ai[r[prow]]; 1252 fill[n] = n; 1253 fill[prow] = -1; /* marker for diagonal entry */ 1254 while (nz--) { 1255 fm = n; 1256 idx = ic[*xi++]; 1257 do { 1258 m = fm; 1259 fm = fill[m]; 1260 } while (fm < idx); 1261 fill[m] = idx; 1262 fill[idx] = fm; 1263 im[idx] = 0; 1264 } 1265 1266 /* make sure diagonal entry is included */ 1267 if (diagonal_fill && fill[prow] == -1) { 1268 fm = n; 1269 while (fill[fm] < prow) fm = fill[fm]; 1270 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1271 fill[fm] = prow; 1272 im[prow] = 0; 1273 nzf++; 1274 dcount++; 1275 } 1276 1277 nzi = 0; 1278 row = fill[n]; 1279 while ( row < prow ) { 1280 incrlev = im[row] + 1; 1281 nz = dloc[row]; 1282 xi = ajnew + ainew[row] + nz + 1; 1283 flev = ajfill + ainew[row] + nz + 1; 1284 nnz = ainew[row+1] - ainew[row] - nz - 1; 1285 fm = row; 1286 while (nnz-- > 0) { 1287 idx = *xi++; 1288 if (*flev + incrlev > levels) { 1289 flev++; 1290 continue; 1291 } 1292 do { 1293 m = fm; 1294 fm = fill[m]; 1295 } while (fm < idx); 1296 if (fm != idx) { 1297 im[idx] = *flev + incrlev; 1298 fill[m] = idx; 1299 fill[idx] = fm; 1300 fm = idx; 1301 nzf++; 1302 } else { 1303 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1304 } 1305 flev++; 1306 } 1307 row = fill[row]; 1308 nzi++; 1309 } 1310 /* copy new filled row into permanent storage */ 1311 ainew[prow+1] = ainew[prow] + nzf; 1312 if (ainew[prow+1] > jmax) { 1313 1314 /* estimate how much additional space we will need */ 1315 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1316 /* just double the memory each time */ 1317 int maxadd = jmax; 1318 /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 1319 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1320 jmax += maxadd; 1321 1322 /* allocate a longer ajnew and ajfill */ 1323 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1324 ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1325 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1326 ajnew = xi; 1327 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1328 ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr); 1329 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1330 ajfill = xi; 1331 realloc++; /* count how many reallocations are needed */ 1332 } 1333 xi = ajnew + ainew[prow]; 1334 flev = ajfill + ainew[prow]; 1335 dloc[prow] = nzi; 1336 fm = fill[n]; 1337 while (nzf--) { 1338 *xi++ = fm; 1339 *flev++ = im[fm]; 1340 fm = fill[fm]; 1341 } 1342 /* make sure row has diagonal entry */ 1343 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1344 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1345 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1346 } 1347 } 1348 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1350 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1351 ierr = PetscFree(fill);CHKERRQ(ierr); 1352 ierr = PetscFree(im);CHKERRQ(ierr); 1353 1354 { 1355 double af = ((double)ainew[n])/((double)ai[n]); 1356 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 1357 realloc,f,af); 1358 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 1359 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 1360 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 1361 if (diagonal_fill) { 1362 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount); 1363 } 1364 } 1365 1366 /* put together the new matrix */ 1367 ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1368 PLogObjectParent(*fact,isicol); 1369 b = (Mat_SeqBAIJ *) (*fact)->data; 1370 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1371 b->singlemalloc = 0; 1372 len = bs2*ainew[n]*sizeof(MatScalar); 1373 /* the next line frees the default space generated by the Create() */ 1374 ierr = PetscFree(b->a);CHKERRQ(ierr); 1375 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1376 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1377 b->j = ajnew; 1378 b->i = ainew; 1379 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1380 b->diag = dloc; 1381 b->ilen = 0; 1382 b->imax = 0; 1383 b->row = isrow; 1384 b->col = iscol; 1385 b->icol = isicol; 1386 b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1387 /* In b structure: Free imax, ilen, old a, old j. 1388 Allocate dloc, solve_work, new a, new j */ 1389 PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 1390 b->maxnz = b->nz = ainew[n]; 1391 (*fact)->factor = FACTOR_LU; 1392 1393 (*fact)->info.factor_mallocs = realloc; 1394 (*fact)->info.fill_ratio_given = f; 1395 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1396 1397 PetscFunctionReturn(0); 1398 } 1399 1400 1401 1402 1403