1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baijfact2.c,v 1.20 1998/12/21 01:00:55 bsmith Exp balay $"; 3 #endif 4 /* 5 Factorization code for BAIJ format. 6 */ 7 8 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/ilu.h" 12 #include "src/inline/dot.h" 13 14 /* ----------------------------------------------------------- */ 15 #undef __FUNC__ 16 #define __FUNC__ "MatSolve_SeqBAIJ_N" 17 int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 18 { 19 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 20 IS iscol=a->col,isrow=a->row; 21 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 22 int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 23 MatScalar *aa=a->a,*v; 24 Scalar *x,*b,*sum,*tmp,*lsum; 25 26 PetscFunctionBegin; 27 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 28 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 29 tmp = a->solve_work; 30 31 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 32 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 33 34 /* forward solve the lower triangular */ 35 PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar)); 36 for ( i=1; i<n; i++ ) { 37 v = aa + bs2*ai[i]; 38 vi = aj + ai[i]; 39 nz = a->diag[i] - ai[i]; 40 sum = tmp + bs*i; 41 PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar)); 42 while (nz--) { 43 Kernel_v_gets_v_minus_A_times_w(bs,sum,v,tmp+bs*(*vi++)); 44 v += bs2; 45 } 46 } 47 /* backward solve the upper triangular */ 48 lsum = a->solve_work + a->n; 49 for ( i=n-1; i>=0; i-- ){ 50 v = aa + bs2*(a->diag[i] + 1); 51 vi = aj + a->diag[i] + 1; 52 nz = ai[i+1] - a->diag[i] - 1; 53 PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar)); 54 while (nz--) { 55 Kernel_v_gets_v_minus_A_times_w(bs,lsum,v,tmp+bs*(*vi++)); 56 v += bs2; 57 } 58 Kernel_w_gets_A_times_v(bs,lsum,aa+bs2*a->diag[i],tmp+i*bs); 59 PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar)); 60 } 61 62 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 63 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 64 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 65 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 66 PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 67 PetscFunctionReturn(0); 68 } 69 70 #undef __FUNC__ 71 #define __FUNC__ "MatSolve_SeqBAIJ_7" 72 int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 73 { 74 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 75 IS iscol=a->col,isrow=a->row; 76 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 77 int *diag = a->diag; 78 MatScalar *aa=a->a,*v; 79 Scalar sum1,sum2,sum3,sum4,sum5,sum6,sum7,x1,x2,x3,x4,x5,x6,x7; 80 Scalar *x,*b,*tmp; 81 82 PetscFunctionBegin; 83 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 84 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 85 tmp = a->solve_work; 86 87 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 88 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 89 90 /* forward solve the lower triangular */ 91 idx = 7*(*r++); 92 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 93 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 94 tmp[5] = b[5+idx]; tmp[6] = b[6+idx]; 95 96 for ( i=1; i<n; i++ ) { 97 v = aa + 49*ai[i]; 98 vi = aj + ai[i]; 99 nz = diag[i] - ai[i]; 100 idx = 7*(*r++); 101 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 102 sum5 = b[4+idx];sum6 = b[5+idx];sum7 = b[6+idx]; 103 while (nz--) { 104 idx = 7*(*vi++); 105 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 106 x4 = tmp[3+idx];x5 = tmp[4+idx]; 107 x6 = tmp[5+idx];x7 = tmp[6+idx]; 108 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 109 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 110 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 111 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 112 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 113 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 114 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 115 v += 49; 116 } 117 idx = 7*i; 118 tmp[idx] = sum1;tmp[1+idx] = sum2; 119 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 120 tmp[5+idx] = sum6;tmp[6+idx] = sum7; 121 } 122 /* backward solve the upper triangular */ 123 for ( i=n-1; i>=0; i-- ){ 124 v = aa + 49*diag[i] + 49; 125 vi = aj + diag[i] + 1; 126 nz = ai[i+1] - diag[i] - 1; 127 idt = 7*i; 128 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 129 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 130 sum6 = tmp[5+idt];sum7 = tmp[6+idt]; 131 while (nz--) { 132 idx = 7*(*vi++); 133 x1 = tmp[idx]; x2 = tmp[1+idx]; 134 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 135 x6 = tmp[5+idx]; x7 = tmp[6+idx]; 136 sum1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 137 sum2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 138 sum3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 139 sum4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 140 sum5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 141 sum6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 142 sum7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 143 v += 49; 144 } 145 idc = 7*(*c--); 146 v = aa + 49*diag[i]; 147 x[idc] = tmp[idt] = v[0]*sum1+v[7]*sum2+v[14]*sum3+ 148 v[21]*sum4+v[28]*sum5+v[35]*sum6+v[42]*sum7; 149 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[8]*sum2+v[15]*sum3+ 150 v[22]*sum4+v[29]*sum5+v[36]*sum6+v[43]*sum7; 151 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[9]*sum2+v[16]*sum3+ 152 v[23]*sum4+v[30]*sum5+v[37]*sum6+v[44]*sum7; 153 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[10]*sum2+v[17]*sum3+ 154 v[24]*sum4+v[31]*sum5+v[38]*sum6+v[45]*sum7; 155 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[11]*sum2+v[18]*sum3+ 156 v[25]*sum4+v[32]*sum5+v[39]*sum6+v[46]*sum7; 157 x[5+idc] = tmp[5+idt] = v[5]*sum1+v[12]*sum2+v[19]*sum3+ 158 v[26]*sum4+v[33]*sum5+v[40]*sum6+v[47]*sum7; 159 x[6+idc] = tmp[6+idt] = v[6]*sum1+v[13]*sum2+v[20]*sum3+ 160 v[27]*sum4+v[34]*sum5+v[41]*sum6+v[48]*sum7; 161 } 162 163 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 164 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 165 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 166 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 167 PLogFlops(2*49*(a->nz) - 7*a->n); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNC__ 172 #define __FUNC__ "MatSolve_SeqBAIJ_5" 173 int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 174 { 175 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 176 IS iscol=a->col,isrow=a->row; 177 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 178 int *diag = a->diag; 179 MatScalar *aa=a->a,*v; 180 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*tmp; 181 182 PetscFunctionBegin; 183 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 184 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 185 tmp = a->solve_work; 186 187 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 188 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 189 190 /* forward solve the lower triangular */ 191 idx = 5*(*r++); 192 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 193 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 194 for ( i=1; i<n; i++ ) { 195 v = aa + 25*ai[i]; 196 vi = aj + ai[i]; 197 nz = diag[i] - ai[i]; 198 idx = 5*(*r++); 199 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 200 sum5 = b[4+idx]; 201 while (nz--) { 202 idx = 5*(*vi++); 203 x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 204 x4 = tmp[3+idx];x5 = tmp[4+idx]; 205 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 206 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 207 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 208 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 209 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 210 v += 25; 211 } 212 idx = 5*i; 213 tmp[idx] = sum1;tmp[1+idx] = sum2; 214 tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 215 } 216 /* backward solve the upper triangular */ 217 for ( i=n-1; i>=0; i-- ){ 218 v = aa + 25*diag[i] + 25; 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 idt = 5*i; 222 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 223 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 224 while (nz--) { 225 idx = 5*(*vi++); 226 x1 = tmp[idx]; x2 = tmp[1+idx]; 227 x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 228 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 229 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 230 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 231 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 232 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 233 v += 25; 234 } 235 idc = 5*(*c--); 236 v = aa + 25*diag[i]; 237 x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 238 v[15]*sum4+v[20]*sum5; 239 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 240 v[16]*sum4+v[21]*sum5; 241 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 242 v[17]*sum4+v[22]*sum5; 243 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 244 v[18]*sum4+v[23]*sum5; 245 x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 246 v[19]*sum4+v[24]*sum5; 247 } 248 249 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 250 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 251 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 252 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 253 PLogFlops(2*25*(a->nz) - 5*a->n); 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNC__ 258 #define __FUNC__ "MatSolve_SeqBAIJ_4" 259 int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 260 { 261 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 262 IS iscol=a->col,isrow=a->row; 263 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 264 int *diag = a->diag; 265 MatScalar *aa=a->a,*v; 266 Scalar *x,*b,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*tmp; 267 268 PetscFunctionBegin; 269 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 270 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 271 tmp = a->solve_work; 272 273 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 274 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 275 276 /* forward solve the lower triangular */ 277 idx = 4*(*r++); 278 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 279 tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 280 for ( i=1; i<n; i++ ) { 281 v = aa + 16*ai[i]; 282 vi = aj + ai[i]; 283 nz = diag[i] - ai[i]; 284 idx = 4*(*r++); 285 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 286 while (nz--) { 287 idx = 4*(*vi++); 288 x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 289 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 290 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 291 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 292 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 293 v += 16; 294 } 295 idx = 4*i; 296 tmp[idx] = sum1;tmp[1+idx] = sum2; 297 tmp[2+idx] = sum3;tmp[3+idx] = sum4; 298 } 299 /* backward solve the upper triangular */ 300 for ( i=n-1; i>=0; i-- ){ 301 v = aa + 16*diag[i] + 16; 302 vi = aj + diag[i] + 1; 303 nz = ai[i+1] - diag[i] - 1; 304 idt = 4*i; 305 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 306 sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 307 while (nz--) { 308 idx = 4*(*vi++); 309 x1 = tmp[idx]; x2 = tmp[1+idx]; 310 x3 = tmp[2+idx]; x4 = tmp[3+idx]; 311 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 312 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 313 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 314 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 315 v += 16; 316 } 317 idc = 4*(*c--); 318 v = aa + 16*diag[i]; 319 x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 320 x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 321 x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 322 x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 323 } 324 325 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 326 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 327 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 328 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 329 PLogFlops(2*16*(a->nz) - 4*a->n); 330 PetscFunctionReturn(0); 331 } 332 333 334 /* 335 Special case where the matrix was ILU(0) factored in the natural 336 ordering. This eliminates the need for the column and row permutation. 337 */ 338 #undef __FUNC__ 339 #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 340 int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 341 { 342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 343 int n=a->mbs,*ai=a->i,*aj=a->j; 344 int ierr,*diag = a->diag; 345 MatScalar *aa=a->a; 346 Scalar *x,*b; 347 348 PetscFunctionBegin; 349 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 352 #if defined(USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 353 { 354 static Scalar w[2000]; /* very BAD need to fix */ 355 fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 356 } 357 #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJ) 358 { 359 static Scalar w[2000]; /* very BAD need to fix */ 360 fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 361 } 362 #elif defined(USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 363 fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 364 #else 365 { 366 Scalar sum1,sum2,sum3,sum4,x1,x2,x3,x4; 367 MatScalar *v; 368 int jdx,idt,idx,nz,*vi,i; 369 370 /* forward solve the lower triangular */ 371 idx = 0; 372 x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 373 for ( i=1; i<n; i++ ) { 374 v = aa + 16*ai[i]; 375 vi = aj + ai[i]; 376 nz = diag[i] - ai[i]; 377 idx += 4; 378 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 379 while (nz--) { 380 jdx = 4*(*vi++); 381 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 382 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 383 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 384 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 385 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 386 v += 16; 387 } 388 x[idx] = sum1; 389 x[1+idx] = sum2; 390 x[2+idx] = sum3; 391 x[3+idx] = sum4; 392 } 393 /* backward solve the upper triangular */ 394 for ( i=n-1; i>=0; i-- ){ 395 v = aa + 16*diag[i] + 16; 396 vi = aj + diag[i] + 1; 397 nz = ai[i+1] - diag[i] - 1; 398 idt = 4*i; 399 sum1 = x[idt]; sum2 = x[1+idt]; 400 sum3 = x[2+idt];sum4 = x[3+idt]; 401 while (nz--) { 402 idx = 4*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 404 sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 405 sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 406 sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 407 sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 408 v += 16; 409 } 410 v = aa + 16*diag[i]; 411 x[idt] = v[0]*sum1 + v[4]*sum2 + v[8]*sum3 + v[12]*sum4; 412 x[1+idt] = v[1]*sum1 + v[5]*sum2 + v[9]*sum3 + v[13]*sum4; 413 x[2+idt] = v[2]*sum1 + v[6]*sum2 + v[10]*sum3 + v[14]*sum4; 414 x[3+idt] = v[3]*sum1 + v[7]*sum2 + v[11]*sum3 + v[15]*sum4; 415 } 416 } 417 #endif 418 419 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 420 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 421 PLogFlops(2*16*(a->nz) - 4*a->n); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNC__ 426 #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 427 int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 430 int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 431 int ierr,*diag = a->diag,jdx; 432 MatScalar *aa=a->a,*v; 433 Scalar *x,*b,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;; 434 435 PetscFunctionBegin; 436 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 437 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 438 /* forward solve the lower triangular */ 439 idx = 0; 440 x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 441 for ( i=1; i<n; i++ ) { 442 v = aa + 25*ai[i]; 443 vi = aj + ai[i]; 444 nz = diag[i] - ai[i]; 445 idx = 5*i; 446 sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx];sum5 = b[4+idx]; 447 while (nz--) { 448 jdx = 5*(*vi++); 449 x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 450 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 451 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 452 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 453 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 454 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 455 v += 25; 456 } 457 x[idx] = sum1; 458 x[1+idx] = sum2; 459 x[2+idx] = sum3; 460 x[3+idx] = sum4; 461 x[4+idx] = sum5; 462 } 463 /* backward solve the upper triangular */ 464 for ( i=n-1; i>=0; i-- ){ 465 v = aa + 25*diag[i] + 25; 466 vi = aj + diag[i] + 1; 467 nz = ai[i+1] - diag[i] - 1; 468 idt = 5*i; 469 sum1 = x[idt]; sum2 = x[1+idt]; 470 sum3 = x[2+idt];sum4 = x[3+idt]; sum5 = x[4+idt]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 474 sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 475 sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 476 sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 477 sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 478 sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 479 v += 25; 480 } 481 v = aa + 25*diag[i]; 482 x[idt] = v[0]*sum1 + v[5]*sum2 + v[10]*sum3 + v[15]*sum4 + v[20]*sum5; 483 x[1+idt] = v[1]*sum1 + v[6]*sum2 + v[11]*sum3 + v[16]*sum4 + v[21]*sum5; 484 x[2+idt] = v[2]*sum1 + v[7]*sum2 + v[12]*sum3 + v[17]*sum4 + v[22]*sum5; 485 x[3+idt] = v[3]*sum1 + v[8]*sum2 + v[13]*sum3 + v[18]*sum4 + v[23]*sum5; 486 x[4+idt] = v[4]*sum1 + v[9]*sum2 + v[14]*sum3 + v[19]*sum4 + v[24]*sum5; 487 } 488 489 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 490 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 491 PLogFlops(2*25*(a->nz) - 5*a->n); 492 PetscFunctionReturn(0); 493 } 494 495 496 #undef __FUNC__ 497 #define __FUNC__ "MatSolve_SeqBAIJ_3" 498 int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 499 { 500 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 501 IS iscol=a->col,isrow=a->row; 502 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 503 int *diag = a->diag; 504 MatScalar *aa=a->a,*v; 505 Scalar *x,*b,sum1,sum2,sum3,x1,x2,x3,*tmp; 506 507 PetscFunctionBegin; 508 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 509 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 510 tmp = a->solve_work; 511 512 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 513 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 514 515 /* forward solve the lower triangular */ 516 idx = 3*(*r++); 517 tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 518 for ( i=1; i<n; i++ ) { 519 v = aa + 9*ai[i]; 520 vi = aj + ai[i]; 521 nz = diag[i] - ai[i]; 522 idx = 3*(*r++); 523 sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 524 while (nz--) { 525 idx = 3*(*vi++); 526 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 527 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 528 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 529 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 530 v += 9; 531 } 532 idx = 3*i; 533 tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 534 } 535 /* backward solve the upper triangular */ 536 for ( i=n-1; i>=0; i-- ){ 537 v = aa + 9*diag[i] + 9; 538 vi = aj + diag[i] + 1; 539 nz = ai[i+1] - diag[i] - 1; 540 idt = 3*i; 541 sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 542 while (nz--) { 543 idx = 3*(*vi++); 544 x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 545 sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 546 sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 547 sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 548 v += 9; 549 } 550 idc = 3*(*c--); 551 v = aa + 9*diag[i]; 552 x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 553 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 554 x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 555 } 556 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 557 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 558 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 559 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 560 PLogFlops(2*9*(a->nz) - 3*a->n); 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNC__ 565 #define __FUNC__ "MatSolve_SeqBAIJ_2" 566 int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 567 { 568 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 569 IS iscol=a->col,isrow=a->row; 570 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 571 int *diag = a->diag; 572 MatScalar *aa=a->a,*v; 573 Scalar *x,*b,sum1,sum2,x1,x2,*tmp; 574 575 PetscFunctionBegin; 576 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 577 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 578 tmp = a->solve_work; 579 580 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 581 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 582 583 /* forward solve the lower triangular */ 584 idx = 2*(*r++); 585 tmp[0] = b[idx]; tmp[1] = b[1+idx]; 586 for ( i=1; i<n; i++ ) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = diag[i] - ai[i]; 590 idx = 2*(*r++); 591 sum1 = b[idx]; sum2 = b[1+idx]; 592 while (nz--) { 593 idx = 2*(*vi++); 594 x1 = tmp[idx]; x2 = tmp[1+idx]; 595 sum1 -= v[0]*x1 + v[2]*x2; 596 sum2 -= v[1]*x1 + v[3]*x2; 597 v += 4; 598 } 599 idx = 2*i; 600 tmp[idx] = sum1; tmp[1+idx] = sum2; 601 } 602 /* backward solve the upper triangular */ 603 for ( i=n-1; i>=0; i-- ){ 604 v = aa + 4*diag[i] + 4; 605 vi = aj + diag[i] + 1; 606 nz = ai[i+1] - diag[i] - 1; 607 idt = 2*i; 608 sum1 = tmp[idt]; sum2 = tmp[1+idt]; 609 while (nz--) { 610 idx = 2*(*vi++); 611 x1 = tmp[idx]; x2 = tmp[1+idx]; 612 sum1 -= v[0]*x1 + v[2]*x2; 613 sum2 -= v[1]*x1 + v[3]*x2; 614 v += 4; 615 } 616 idc = 2*(*c--); 617 v = aa + 4*diag[i]; 618 x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 619 x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 620 } 621 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 622 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 623 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 625 PLogFlops(2*4*(a->nz) - 2*a->n); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatSolve_SeqBAIJ_1" 631 int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 632 { 633 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 634 IS iscol=a->col,isrow=a->row; 635 int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 636 int *diag = a->diag; 637 MatScalar *aa=a->a,*v; 638 Scalar *x,*b,sum1,*tmp; 639 640 PetscFunctionBegin; 641 if (!n) PetscFunctionReturn(0); 642 643 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 644 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 645 tmp = a->solve_work; 646 647 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 648 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 649 650 /* forward solve the lower triangular */ 651 tmp[0] = b[*r++]; 652 for ( i=1; i<n; i++ ) { 653 v = aa + ai[i]; 654 vi = aj + ai[i]; 655 nz = diag[i] - ai[i]; 656 sum1 = b[*r++]; 657 while (nz--) { 658 sum1 -= (*v++)*tmp[*vi++]; 659 } 660 tmp[i] = sum1; 661 } 662 /* backward solve the upper triangular */ 663 for ( i=n-1; i>=0; i-- ){ 664 v = aa + diag[i] + 1; 665 vi = aj + diag[i] + 1; 666 nz = ai[i+1] - diag[i] - 1; 667 sum1 = tmp[i]; 668 while (nz--) { 669 sum1 -= (*v++)*tmp[*vi++]; 670 } 671 x[*c--] = tmp[i] = aa[diag[i]]*sum1; 672 } 673 674 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 675 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 676 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 677 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 678 PLogFlops(2*1*(a->nz) - a->n); 679 PetscFunctionReturn(0); 680 } 681 682 /* ----------------------------------------------------------------*/ 683 /* 684 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 685 except that the data structure of Mat_SeqAIJ is slightly different. 686 Not a good example of code reuse. 687 */ 688 #undef __FUNC__ 689 #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 690 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 693 IS isicol; 694 int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 695 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 696 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0; 697 int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2; 698 PetscTruth col_identity, row_identity; 699 700 PetscFunctionBegin; 701 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 702 703 /* special case that simply copies fill pattern */ 704 PetscValidHeaderSpecific(isrow,IS_COOKIE); 705 PetscValidHeaderSpecific(iscol,IS_COOKIE); 706 ierr = ISIdentity(isrow,&row_identity); CHKERRQ(ierr); 707 ierr = ISIdentity(iscol,&col_identity); CHKERRQ(ierr); 708 if (levels == 0 && row_identity && col_identity) { 709 ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact); CHKERRQ(ierr); 710 (*fact)->factor = FACTOR_LU; 711 b = (Mat_SeqBAIJ *) (*fact)->data; 712 if (!b->diag) { 713 ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 714 } 715 b->row = isrow; 716 b->col = iscol; 717 b->icol = isicol; 718 b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 719 /* 720 Blocksize 4 and 5 a special faster solver for ILU(0) factorization 721 with natural ordering 722 */ 723 if (b->bs == 4) { 724 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 725 (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 726 PLogInfo(A,"Using special natural ordering factor and solve BS=4\n"); 727 } else if (b->bs == 5) { 728 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 729 (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 730 PLogInfo( A,"Using special natural ordering factor and solve BS=5\n"); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 736 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 737 738 /* get new row pointers */ 739 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 740 ainew[0] = 0; 741 /* don't know how many column pointers are needed so estimate */ 742 jmax = (int) (f*ai[n] + 1); 743 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 744 /* ajfill is level of fill for each fill entry */ 745 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 746 /* fill is a linked list of nonzeros in active row */ 747 fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 748 /* im is level for each filled value */ 749 im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 750 /* dloc is location of diagonal in factor */ 751 dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 752 dloc[0] = 0; 753 for ( prow=0; prow<n; prow++ ) { 754 /* first copy previous fill into linked list */ 755 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 756 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 757 xi = aj + ai[r[prow]]; 758 fill[n] = n; 759 while (nz--) { 760 fm = n; 761 idx = ic[*xi++]; 762 do { 763 m = fm; 764 fm = fill[m]; 765 } while (fm < idx); 766 fill[m] = idx; 767 fill[idx] = fm; 768 im[idx] = 0; 769 } 770 nzi = 0; 771 row = fill[n]; 772 while ( row < prow ) { 773 incrlev = im[row] + 1; 774 nz = dloc[row]; 775 xi = ajnew + ainew[row] + nz; 776 flev = ajfill + ainew[row] + nz + 1; 777 nnz = ainew[row+1] - ainew[row] - nz - 1; 778 if (*xi++ != row) { 779 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal"); 780 } 781 fm = row; 782 while (nnz-- > 0) { 783 idx = *xi++; 784 if (*flev + incrlev > levels) { 785 flev++; 786 continue; 787 } 788 do { 789 m = fm; 790 fm = fill[m]; 791 } while (fm < idx); 792 if (fm != idx) { 793 im[idx] = *flev + incrlev; 794 fill[m] = idx; 795 fill[idx] = fm; 796 fm = idx; 797 nzf++; 798 } else { 799 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 800 } 801 flev++; 802 } 803 row = fill[row]; 804 nzi++; 805 } 806 /* copy new filled row into permanent storage */ 807 ainew[prow+1] = ainew[prow] + nzf; 808 if (ainew[prow+1] > jmax) { 809 810 /* estimate how much additional space we will need */ 811 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 812 /* just double the memory each time */ 813 int maxadd = jmax; 814 /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 815 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 816 jmax += maxadd; 817 818 /* allocate a longer ajnew and ajfill */ 819 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 820 PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int)); 821 PetscFree(ajnew); 822 ajnew = xi; 823 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 824 PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int)); 825 PetscFree(ajfill); 826 ajfill = xi; 827 realloc++; /* count how many reallocations are needed */ 828 } 829 xi = ajnew + ainew[prow]; 830 flev = ajfill + ainew[prow]; 831 dloc[prow] = nzi; 832 fm = fill[n]; 833 while (nzf--) { 834 *xi++ = fm; 835 *flev++ = im[fm]; 836 fm = fill[fm]; 837 } 838 } 839 PetscFree(ajfill); 840 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 841 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 842 PetscFree(fill); PetscFree(im); 843 844 { 845 double af = ((double)ainew[n])/((double)ai[n]); 846 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 847 realloc,f,af); 848 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 849 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 850 PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 851 } 852 853 /* put together the new matrix */ 854 ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 855 PLogObjectParent(*fact,isicol); 856 b = (Mat_SeqBAIJ *) (*fact)->data; 857 PetscFree(b->imax); 858 b->singlemalloc = 0; 859 len = bs2*ainew[n]*sizeof(MatScalar); 860 /* the next line frees the default space generated by the Create() */ 861 PetscFree(b->a); PetscFree(b->ilen); 862 b->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a); 863 b->j = ajnew; 864 b->i = ainew; 865 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 866 b->diag = dloc; 867 b->ilen = 0; 868 b->imax = 0; 869 b->row = isrow; 870 b->col = iscol; 871 b->icol = isicol; 872 b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 873 /* In b structure: Free imax, ilen, old a, old j. 874 Allocate dloc, solve_work, new a, new j */ 875 PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 876 b->maxnz = b->nz = ainew[n]; 877 (*fact)->factor = FACTOR_LU; 878 879 (*fact)->info.factor_mallocs = realloc; 880 (*fact)->info.fill_ratio_given = f; 881 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 882 883 PetscFunctionReturn(0); 884 } 885 886 887 888 889