1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "../src/mat/impls/baij/seq/baij.h" 7 #include "../src/mat/blockinvert.h" 8 9 /* ------------------------------------------------------------*/ 10 /* 11 Version for when blocks are 2 by 2 12 */ 13 /* MatLUFactorNumeric_SeqBAIJ_2_newdatastruct - 14 copied from MatLUFactorNumeric_SeqBAIJ_N_newdatastruct() and manually re-implemented 15 Kernel_A_gets_A_times_B() 16 Kernel_A_gets_A_minus_B_times_C() 17 Kernel_A_gets_inverse_A() 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_newdatastruct" 21 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 22 { 23 Mat C=B; 24 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; 25 IS isrow = b->row,isicol = b->icol; 26 PetscErrorCode ierr; 27 const PetscInt *r,*ic,*ics; 28 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 29 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 30 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 31 PetscInt bs2 = a->bs2,flg; 32 PetscReal shift = info->shiftinblocks; 33 34 PetscFunctionBegin; 35 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 36 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 37 38 /* generate work space needed by the factorization */ 39 ierr = PetscMalloc((bs2*n+bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 40 mwork = rtmp + bs2*n; 41 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 42 ics = ic; 43 44 for (i=0; i<n; i++){ 45 /* zero rtmp */ 46 /* L part */ 47 nz = bi[i+1] - bi[i]; 48 bjtmp = bj + bi[i]; 49 for (j=0; j<nz; j++){ 50 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 51 } 52 53 /* U part */ 54 nz = bi[2*n-i+1] - bi[2*n-i]; 55 bjtmp = bj + bi[2*n-i]; 56 for (j=0; j<nz; j++){ 57 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 58 } 59 60 /* load in initial (unfactored row) */ 61 nz = ai[r[i]+1] - ai[r[i]]; 62 ajtmp = aj + ai[r[i]]; 63 v = aa + bs2*ai[r[i]]; 64 for (j=0; j<nz; j++) { 65 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 66 } 67 68 /* elimination */ 69 bjtmp = bj + bi[i]; 70 nzL = bi[i+1] - bi[i]; 71 for(k=0;k < nzL;k++) { 72 row = bjtmp[k]; 73 pc = rtmp + bs2*row; 74 for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} 75 if (flg) { 76 pv = b->a + bs2*bdiag[row]; 77 /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 78 ierr = Kernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 79 80 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 81 pv = b->a + bs2*bi[2*n-row]; 82 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries inU(row,:), excluding diag */ 83 for (j=0; j<nz; j++) { 84 /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 85 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 86 v = rtmp + 4*pj[j]; 87 ierr = Kernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 88 pv += 4; 89 } 90 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 91 } 92 } 93 94 /* finished row so stick it into b->a */ 95 /* L part */ 96 pv = b->a + bs2*bi[i] ; 97 pj = b->j + bi[i] ; 98 nz = bi[i+1] - bi[i]; 99 for (j=0; j<nz; j++) { 100 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 101 } 102 103 /* Mark diagonal and invert diagonal for simplier triangular solves */ 104 pv = b->a + bs2*bdiag[i]; 105 pj = b->j + bdiag[i]; 106 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 107 /* ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ 108 ierr = Kernel_A_gets_inverse_A_2(pv,shift);CHKERRQ(ierr); 109 110 /* U part */ 111 pv = b->a + bs2*bi[2*n-i]; 112 pj = b->j + bi[2*n-i]; 113 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 114 for (j=0; j<nz; j++){ 115 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 116 } 117 } 118 119 ierr = PetscFree(rtmp);CHKERRQ(ierr); 120 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 121 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 122 123 C->assembled = PETSC_TRUE; 124 ierr = PetscLogFlops(1.3333*bs2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_newdatastruct_v2" 130 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct_v2(Mat B,Mat A,const MatFactorInfo *info) 131 { 132 Mat C=B; 133 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; 134 IS isrow = b->row,isicol = b->icol; 135 PetscErrorCode ierr; 136 const PetscInt *r,*ic,*ics; 137 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 138 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 139 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 140 PetscInt bs2 = a->bs2,flg; 141 PetscReal shift = info->shiftinblocks; 142 143 PetscFunctionBegin; 144 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 145 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 146 147 /* generate work space needed by the factorization */ 148 ierr = PetscMalloc((bs2*n+bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 149 mwork = rtmp + bs2*n; 150 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 151 ics = ic; 152 153 for (i=0; i<n; i++){ 154 /* zero rtmp */ 155 /* L part */ 156 nz = bi[i+1] - bi[i]; 157 bjtmp = bj + bi[i]; 158 for (j=0; j<nz; j++){ 159 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 160 } 161 162 /* U part */ 163 nz = bdiag[i] - bdiag[i+1]; 164 bjtmp = bj + bdiag[i+1]+1; 165 for (j=0; j<nz; j++){ 166 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 167 } 168 169 /* load in initial (unfactored row) */ 170 nz = ai[r[i]+1] - ai[r[i]]; 171 ajtmp = aj + ai[r[i]]; 172 v = aa + bs2*ai[r[i]]; 173 for (j=0; j<nz; j++) { 174 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 175 } 176 177 /* elimination */ 178 bjtmp = bj + bi[i]; 179 nzL = bi[i+1] - bi[i]; 180 for(k=0;k < nzL;k++) { 181 row = bjtmp[k]; 182 pc = rtmp + bs2*row; 183 for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} 184 if (flg) { 185 pv = b->a + bs2*bdiag[row]; 186 /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 187 ierr = Kernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 188 189 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 190 pv = b->a + bs2*(bdiag[row+1]+1); 191 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 192 for (j=0; j<nz; j++) { 193 /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 194 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 195 v = rtmp + 4*pj[j]; 196 ierr = Kernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 197 pv += 4; 198 } 199 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 200 } 201 } 202 203 /* finished row so stick it into b->a */ 204 /* L part */ 205 pv = b->a + bs2*bi[i] ; 206 pj = b->j + bi[i] ; 207 nz = bi[i+1] - bi[i]; 208 for (j=0; j<nz; j++) { 209 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 210 } 211 212 /* Mark diagonal and invert diagonal for simplier triangular solves */ 213 pv = b->a + bs2*bdiag[i]; 214 pj = b->j + bdiag[i]; 215 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 216 /* ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ 217 ierr = Kernel_A_gets_inverse_A_2(pv,shift);CHKERRQ(ierr); 218 219 /* U part */ 220 pv = b->a + bs2*(bdiag[i+1]+1); 221 pj = b->j + bdiag[i+1]+1; 222 nz = bdiag[i] - bdiag[i+1] - 1; 223 for (j=0; j<nz; j++){ 224 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 225 } 226 } 227 228 ierr = PetscFree(rtmp);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 232 C->assembled = PETSC_TRUE; 233 ierr = PetscLogFlops(1.3333*bs2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 234 PetscFunctionReturn(0); 235 } 236 237 /* 238 MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct - 239 copied from MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(), then remove ordering 240 */ 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct" 243 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 244 { 245 Mat C=B; 246 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; 247 PetscErrorCode ierr; 248 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 249 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 250 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 251 PetscInt bs2 = a->bs2,flg; 252 PetscReal shift = info->shiftinblocks; 253 254 PetscFunctionBegin; 255 /* generate work space needed by the factorization */ 256 ierr = PetscMalloc((bs2*n+bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 257 mwork = rtmp + bs2*n; 258 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 259 260 for (i=0; i<n; i++){ 261 /* zero rtmp */ 262 /* L part */ 263 nz = bi[i+1] - bi[i]; 264 bjtmp = bj + bi[i]; 265 for (j=0; j<nz; j++){ 266 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 267 } 268 269 /* U part */ 270 nz = bi[2*n-i+1] - bi[2*n-i]; 271 bjtmp = bj + bi[2*n-i]; 272 for (j=0; j<nz; j++){ 273 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 274 } 275 276 /* load in initial (unfactored row) */ 277 nz = ai[i+1] - ai[i]; 278 ajtmp = aj + ai[i]; 279 v = aa + bs2*ai[i]; 280 for (j=0; j<nz; j++) { 281 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 282 } 283 284 /* elimination */ 285 bjtmp = bj + bi[i]; 286 nzL = bi[i+1] - bi[i]; 287 for(k=0;k < nzL;k++) { 288 row = bjtmp[k]; 289 pc = rtmp + bs2*row; 290 for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} 291 if (flg) { 292 pv = b->a + bs2*bdiag[row]; 293 /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 294 ierr = Kernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 295 296 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 297 pv = b->a + bs2*bi[2*n-row]; 298 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries inU(row,:), excluding diag */ 299 for (j=0; j<nz; j++) { 300 /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 301 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 302 v = rtmp + 4*pj[j]; 303 ierr = Kernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 304 pv += 4; 305 } 306 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 307 } 308 } 309 310 /* finished row so stick it into b->a */ 311 /* L part */ 312 pv = b->a + bs2*bi[i] ; 313 pj = b->j + bi[i] ; 314 nz = bi[i+1] - bi[i]; 315 for (j=0; j<nz; j++) { 316 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 317 } 318 319 /* Mark diagonal and invert diagonal for simplier triangular solves */ 320 pv = b->a + bs2*bdiag[i]; 321 pj = b->j + bdiag[i]; 322 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 323 /* ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ 324 ierr = Kernel_A_gets_inverse_A_2(pv,shift);CHKERRQ(ierr); 325 326 /* U part */ 327 pv = b->a + bs2*bi[2*n-i]; 328 pj = b->j + bi[2*n-i]; 329 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 330 for (j=0; j<nz; j++){ 331 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 } 333 } 334 335 ierr = PetscFree(rtmp);CHKERRQ(ierr); 336 C->assembled = PETSC_TRUE; 337 ierr = PetscLogFlops(1.3333*bs2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2" 343 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2(Mat B,Mat A,const MatFactorInfo *info) 344 { 345 Mat C=B; 346 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; 347 PetscErrorCode ierr; 348 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 349 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 350 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 351 PetscInt bs2 = a->bs2,flg; 352 PetscReal shift = info->shiftinblocks; 353 354 PetscFunctionBegin; 355 /* generate work space needed by the factorization */ 356 ierr = PetscMalloc((bs2*n+bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 357 mwork = rtmp + bs2*n; 358 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 359 360 for (i=0; i<n; i++){ 361 /* zero rtmp */ 362 /* L part */ 363 nz = bi[i+1] - bi[i]; 364 bjtmp = bj + bi[i]; 365 for (j=0; j<nz; j++){ 366 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 367 } 368 369 /* U part */ 370 nz = bdiag[i] - bdiag[i+1]; 371 bjtmp = bj + bdiag[i+1]+1; 372 for (j=0; j<nz; j++){ 373 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 374 } 375 376 /* load in initial (unfactored row) */ 377 nz = ai[i+1] - ai[i]; 378 ajtmp = aj + ai[i]; 379 v = aa + bs2*ai[i]; 380 for (j=0; j<nz; j++) { 381 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 382 } 383 384 /* elimination */ 385 bjtmp = bj + bi[i]; 386 nzL = bi[i+1] - bi[i]; 387 for(k=0;k < nzL;k++) { 388 row = bjtmp[k]; 389 pc = rtmp + bs2*row; 390 for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} 391 if (flg) { 392 pv = b->a + bs2*bdiag[row]; 393 /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 394 ierr = Kernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 395 396 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 397 pv = b->a + bs2*(bdiag[row+1]+1); 398 nz = bdiag[row]-bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */ 399 for (j=0; j<nz; j++) { 400 /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 401 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 402 v = rtmp + 4*pj[j]; 403 ierr = Kernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 404 pv += 4; 405 } 406 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 407 } 408 } 409 410 /* finished row so stick it into b->a */ 411 /* L part */ 412 pv = b->a + bs2*bi[i] ; 413 pj = b->j + bi[i] ; 414 nz = bi[i+1] - bi[i]; 415 for (j=0; j<nz; j++) { 416 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 417 } 418 419 /* Mark diagonal and invert diagonal for simplier triangular solves */ 420 pv = b->a + bs2*bdiag[i]; 421 pj = b->j + bdiag[i]; 422 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 423 /* ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ 424 ierr = Kernel_A_gets_inverse_A_2(pv,shift);CHKERRQ(ierr); 425 426 /* U part */ 427 /* 428 pv = b->a + bs2*bi[2*n-i]; 429 pj = b->j + bi[2*n-i]; 430 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 431 */ 432 pv = b->a + bs2*(bdiag[i+1]+1); 433 pj = b->j + bdiag[i+1]+1; 434 nz = bdiag[i] - bdiag[i+1] - 1; 435 for (j=0; j<nz; j++){ 436 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 437 } 438 } 439 440 ierr = PetscFree(rtmp);CHKERRQ(ierr); 441 C->assembled = PETSC_TRUE; 442 ierr = PetscLogFlops(1.3333*bs2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 448 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info) 449 { 450 Mat C = B; 451 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 452 IS isrow = b->row,isicol = b->icol; 453 PetscErrorCode ierr; 454 const PetscInt *r,*ic; 455 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 456 PetscInt *ajtmpold,*ajtmp,nz,row; 457 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 458 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 459 MatScalar p1,p2,p3,p4; 460 MatScalar *ba = b->a,*aa = a->a; 461 PetscReal shift = info->shiftinblocks; 462 463 PetscFunctionBegin; 464 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 465 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 466 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 467 468 for (i=0; i<n; i++) { 469 nz = bi[i+1] - bi[i]; 470 ajtmp = bj + bi[i]; 471 for (j=0; j<nz; j++) { 472 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 473 } 474 /* load in initial (unfactored row) */ 475 idx = r[i]; 476 nz = ai[idx+1] - ai[idx]; 477 ajtmpold = aj + ai[idx]; 478 v = aa + 4*ai[idx]; 479 for (j=0; j<nz; j++) { 480 x = rtmp+4*ic[ajtmpold[j]]; 481 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 482 v += 4; 483 } 484 row = *ajtmp++; 485 while (row < i) { 486 pc = rtmp + 4*row; 487 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 488 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 489 pv = ba + 4*diag_offset[row]; 490 pj = bj + diag_offset[row] + 1; 491 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 492 pc[0] = m1 = p1*x1 + p3*x2; 493 pc[1] = m2 = p2*x1 + p4*x2; 494 pc[2] = m3 = p1*x3 + p3*x4; 495 pc[3] = m4 = p2*x3 + p4*x4; 496 nz = bi[row+1] - diag_offset[row] - 1; 497 pv += 4; 498 for (j=0; j<nz; j++) { 499 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 500 x = rtmp + 4*pj[j]; 501 x[0] -= m1*x1 + m3*x2; 502 x[1] -= m2*x1 + m4*x2; 503 x[2] -= m1*x3 + m3*x4; 504 x[3] -= m2*x3 + m4*x4; 505 pv += 4; 506 } 507 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 508 } 509 row = *ajtmp++; 510 } 511 /* finished row so stick it into b->a */ 512 pv = ba + 4*bi[i]; 513 pj = bj + bi[i]; 514 nz = bi[i+1] - bi[i]; 515 for (j=0; j<nz; j++) { 516 x = rtmp+4*pj[j]; 517 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 518 pv += 4; 519 } 520 /* invert diagonal block */ 521 w = ba + 4*diag_offset[i]; 522 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 523 } 524 525 ierr = PetscFree(rtmp);CHKERRQ(ierr); 526 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 527 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 528 C->ops->solve = MatSolve_SeqBAIJ_2; 529 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 530 C->assembled = PETSC_TRUE; 531 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 532 PetscFunctionReturn(0); 533 } 534 /* 535 Version for when blocks are 2 by 2 Using natural ordering 536 */ 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 539 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 540 { 541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 542 PetscErrorCode ierr; 543 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 544 PetscInt *ajtmpold,*ajtmp,nz,row; 545 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 546 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 547 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 548 MatScalar *ba = b->a,*aa = a->a; 549 PetscReal shift = info->shiftinblocks; 550 551 PetscFunctionBegin; 552 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 553 for (i=0; i<n; i++) { 554 nz = bi[i+1] - bi[i]; 555 ajtmp = bj + bi[i]; 556 for (j=0; j<nz; j++) { 557 x = rtmp+4*ajtmp[j]; 558 x[0] = x[1] = x[2] = x[3] = 0.0; 559 } 560 /* load in initial (unfactored row) */ 561 nz = ai[i+1] - ai[i]; 562 ajtmpold = aj + ai[i]; 563 v = aa + 4*ai[i]; 564 for (j=0; j<nz; j++) { 565 x = rtmp+4*ajtmpold[j]; 566 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 567 v += 4; 568 } 569 row = *ajtmp++; 570 while (row < i) { 571 pc = rtmp + 4*row; 572 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 573 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 574 pv = ba + 4*diag_offset[row]; 575 pj = bj + diag_offset[row] + 1; 576 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 577 pc[0] = m1 = p1*x1 + p3*x2; 578 pc[1] = m2 = p2*x1 + p4*x2; 579 pc[2] = m3 = p1*x3 + p3*x4; 580 pc[3] = m4 = p2*x3 + p4*x4; 581 nz = bi[row+1] - diag_offset[row] - 1; 582 pv += 4; 583 for (j=0; j<nz; j++) { 584 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 585 x = rtmp + 4*pj[j]; 586 x[0] -= m1*x1 + m3*x2; 587 x[1] -= m2*x1 + m4*x2; 588 x[2] -= m1*x3 + m3*x4; 589 x[3] -= m2*x3 + m4*x4; 590 pv += 4; 591 } 592 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 593 } 594 row = *ajtmp++; 595 } 596 /* finished row so stick it into b->a */ 597 pv = ba + 4*bi[i]; 598 pj = bj + bi[i]; 599 nz = bi[i+1] - bi[i]; 600 for (j=0; j<nz; j++) { 601 x = rtmp+4*pj[j]; 602 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 603 /* 604 printf(" col %d:",pj[j]); 605 PetscInt j1; 606 for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1)); 607 printf("\n"); 608 */ 609 pv += 4; 610 } 611 /* invert diagonal block */ 612 w = ba + 4*diag_offset[i]; 613 /* 614 printf(" \n%d -th: diag: ",i); 615 for (j=0; j<4; j++){ 616 printf(" %g,",w[j]); 617 } 618 printf("\n----------------------------\n"); 619 */ 620 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 621 } 622 623 ierr = PetscFree(rtmp);CHKERRQ(ierr); 624 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 625 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 626 C->assembled = PETSC_TRUE; 627 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 628 PetscFunctionReturn(0); 629 } 630 631 /* ----------------------------------------------------------- */ 632 /* 633 Version for when blocks are 1 by 1. 634 */ 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 637 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat C,Mat A,const MatFactorInfo *info) 638 { 639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 640 IS isrow = b->row,isicol = b->icol; 641 PetscErrorCode ierr; 642 const PetscInt *r,*ic; 643 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 644 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 645 PetscInt *diag_offset = b->diag,diag,*pj; 646 MatScalar *pv,*v,*rtmp,multiplier,*pc; 647 MatScalar *ba = b->a,*aa = a->a; 648 PetscTruth row_identity, col_identity; 649 650 PetscFunctionBegin; 651 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 652 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 653 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 654 655 for (i=0; i<n; i++) { 656 nz = bi[i+1] - bi[i]; 657 ajtmp = bj + bi[i]; 658 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 659 660 /* load in initial (unfactored row) */ 661 nz = ai[r[i]+1] - ai[r[i]]; 662 ajtmpold = aj + ai[r[i]]; 663 v = aa + ai[r[i]]; 664 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 665 666 row = *ajtmp++; 667 while (row < i) { 668 pc = rtmp + row; 669 if (*pc != 0.0) { 670 pv = ba + diag_offset[row]; 671 pj = bj + diag_offset[row] + 1; 672 multiplier = *pc * *pv++; 673 *pc = multiplier; 674 nz = bi[row+1] - diag_offset[row] - 1; 675 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 676 ierr = PetscLogFlops(1.0+2.0*nz);CHKERRQ(ierr); 677 } 678 row = *ajtmp++; 679 } 680 /* finished row so stick it into b->a */ 681 pv = ba + bi[i]; 682 pj = bj + bi[i]; 683 nz = bi[i+1] - bi[i]; 684 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 685 diag = diag_offset[i] - bi[i]; 686 /* check pivot entry for current row */ 687 if (pv[diag] == 0.0) { 688 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot: row in original ordering %D in permuted ordering %D",r[i],i); 689 } 690 pv[diag] = 1.0/pv[diag]; 691 } 692 693 ierr = PetscFree(rtmp);CHKERRQ(ierr); 694 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 695 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 696 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 697 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 698 if (row_identity && col_identity) { 699 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 700 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 701 } else { 702 C->ops->solve = MatSolve_SeqBAIJ_1; 703 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 704 } 705 C->assembled = PETSC_TRUE; 706 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 EXTERN_C_BEGIN 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatGetFactor_seqbaij_petsc" 713 PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 714 { 715 PetscInt n = A->rmap->n; 716 PetscErrorCode ierr; 717 718 PetscFunctionBegin; 719 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 720 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 721 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 722 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 723 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 724 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 725 (*B)->ops->iludtfactor = MatILUDTFactor_SeqBAIJ; 726 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 727 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 728 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 729 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 730 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 731 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 732 (*B)->factor = ftype; 733 PetscFunctionReturn(0); 734 } 735 EXTERN_C_END 736 737 EXTERN_C_BEGIN 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 740 PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 741 { 742 PetscFunctionBegin; 743 *flg = PETSC_TRUE; 744 PetscFunctionReturn(0); 745 } 746 EXTERN_C_END 747 748 /* ----------------------------------------------------------- */ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 751 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 752 { 753 PetscErrorCode ierr; 754 Mat C; 755 756 PetscFunctionBegin; 757 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 758 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 759 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 760 A->ops->solve = C->ops->solve; 761 A->ops->solvetranspose = C->ops->solvetranspose; 762 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 763 ierr = PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 #include "../src/mat/impls/sbaij/seq/sbaij.h" 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 770 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 771 { 772 PetscErrorCode ierr; 773 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 774 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 775 IS ip=b->row; 776 const PetscInt *rip; 777 PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol; 778 PetscInt *ai=a->i,*aj=a->j; 779 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 780 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 781 PetscReal zeropivot,rs,shiftnz; 782 PetscReal shiftpd; 783 ChShift_Ctx sctx; 784 PetscInt newshift; 785 786 PetscFunctionBegin; 787 if (bs > 1) { 788 if (!a->sbaijMat){ 789 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 790 } 791 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);CHKERRQ(ierr); 792 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 793 a->sbaijMat = PETSC_NULL; 794 PetscFunctionReturn(0); 795 } 796 797 /* initialization */ 798 shiftnz = info->shiftnz; 799 shiftpd = info->shiftpd; 800 zeropivot = info->zeropivot; 801 802 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 803 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 804 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 805 jl = il + mbs; 806 rtmp = (MatScalar*)(jl + mbs); 807 808 sctx.shift_amount = 0; 809 sctx.nshift = 0; 810 do { 811 sctx.chshift = PETSC_FALSE; 812 for (i=0; i<mbs; i++) { 813 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 814 } 815 816 for (k = 0; k<mbs; k++){ 817 bval = ba + bi[k]; 818 /* initialize k-th row by the perm[k]-th row of A */ 819 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 820 for (j = jmin; j < jmax; j++){ 821 col = rip[aj[j]]; 822 if (col >= k){ /* only take upper triangular entry */ 823 rtmp[col] = aa[j]; 824 *bval++ = 0.0; /* for in-place factorization */ 825 } 826 } 827 828 /* shift the diagonal of the matrix */ 829 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 830 831 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 832 dk = rtmp[k]; 833 i = jl[k]; /* first row to be added to k_th row */ 834 835 while (i < k){ 836 nexti = jl[i]; /* next row to be added to k_th row */ 837 838 /* compute multiplier, update diag(k) and U(i,k) */ 839 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 840 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 841 dk += uikdi*ba[ili]; 842 ba[ili] = uikdi; /* -U(i,k) */ 843 844 /* add multiple of row i to k-th row */ 845 jmin = ili + 1; jmax = bi[i+1]; 846 if (jmin < jmax){ 847 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 848 /* update il and jl for row i */ 849 il[i] = jmin; 850 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 851 } 852 i = nexti; 853 } 854 855 /* shift the diagonals when zero pivot is detected */ 856 /* compute rs=sum of abs(off-diagonal) */ 857 rs = 0.0; 858 jmin = bi[k]+1; 859 nz = bi[k+1] - jmin; 860 if (nz){ 861 bcol = bj + jmin; 862 while (nz--){ 863 rs += PetscAbsScalar(rtmp[*bcol]); 864 bcol++; 865 } 866 } 867 868 sctx.rs = rs; 869 sctx.pv = dk; 870 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 871 if (newshift == 1) break; 872 873 /* copy data into U(k,:) */ 874 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 875 jmin = bi[k]+1; jmax = bi[k+1]; 876 if (jmin < jmax) { 877 for (j=jmin; j<jmax; j++){ 878 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 879 } 880 /* add the k-th row into il and jl */ 881 il[k] = jmin; 882 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 883 } 884 } 885 } while (sctx.chshift); 886 ierr = PetscFree(il);CHKERRQ(ierr); 887 888 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 889 C->assembled = PETSC_TRUE; 890 C->preallocated = PETSC_TRUE; 891 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 892 if (sctx.nshift){ 893 if (shiftpd) { 894 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 895 } else if (shiftnz) { 896 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 897 } 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 904 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 905 { 906 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 907 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 908 PetscErrorCode ierr; 909 PetscInt i,j,am=a->mbs; 910 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 911 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 912 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 913 PetscReal zeropivot,rs,shiftnz; 914 PetscReal shiftpd; 915 ChShift_Ctx sctx; 916 PetscInt newshift; 917 918 PetscFunctionBegin; 919 /* initialization */ 920 shiftnz = info->shiftnz; 921 shiftpd = info->shiftpd; 922 zeropivot = info->zeropivot; 923 924 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 925 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 926 jl = il + am; 927 rtmp = (MatScalar*)(jl + am); 928 929 sctx.shift_amount = 0; 930 sctx.nshift = 0; 931 do { 932 sctx.chshift = PETSC_FALSE; 933 for (i=0; i<am; i++) { 934 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 935 } 936 937 for (k = 0; k<am; k++){ 938 /* initialize k-th row with elements nonzero in row perm(k) of A */ 939 nz = ai[k+1] - ai[k]; 940 acol = aj + ai[k]; 941 aval = aa + ai[k]; 942 bval = ba + bi[k]; 943 while (nz -- ){ 944 if (*acol < k) { /* skip lower triangular entries */ 945 acol++; aval++; 946 } else { 947 rtmp[*acol++] = *aval++; 948 *bval++ = 0.0; /* for in-place factorization */ 949 } 950 } 951 952 /* shift the diagonal of the matrix */ 953 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 954 955 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 956 dk = rtmp[k]; 957 i = jl[k]; /* first row to be added to k_th row */ 958 959 while (i < k){ 960 nexti = jl[i]; /* next row to be added to k_th row */ 961 /* compute multiplier, update D(k) and U(i,k) */ 962 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 963 uikdi = - ba[ili]*ba[bi[i]]; 964 dk += uikdi*ba[ili]; 965 ba[ili] = uikdi; /* -U(i,k) */ 966 967 /* add multiple of row i to k-th row ... */ 968 jmin = ili + 1; 969 nz = bi[i+1] - jmin; 970 if (nz > 0){ 971 bcol = bj + jmin; 972 bval = ba + jmin; 973 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 974 /* update il and jl for i-th row */ 975 il[i] = jmin; 976 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 977 } 978 i = nexti; 979 } 980 981 /* shift the diagonals when zero pivot is detected */ 982 /* compute rs=sum of abs(off-diagonal) */ 983 rs = 0.0; 984 jmin = bi[k]+1; 985 nz = bi[k+1] - jmin; 986 if (nz){ 987 bcol = bj + jmin; 988 while (nz--){ 989 rs += PetscAbsScalar(rtmp[*bcol]); 990 bcol++; 991 } 992 } 993 994 sctx.rs = rs; 995 sctx.pv = dk; 996 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 997 if (newshift == 1) break; /* sctx.shift_amount is updated */ 998 999 /* copy data into U(k,:) */ 1000 ba[bi[k]] = 1.0/dk; 1001 jmin = bi[k]+1; 1002 nz = bi[k+1] - jmin; 1003 if (nz){ 1004 bcol = bj + jmin; 1005 bval = ba + jmin; 1006 while (nz--){ 1007 *bval++ = rtmp[*bcol]; 1008 rtmp[*bcol++] = 0.0; 1009 } 1010 /* add k-th row into il and jl */ 1011 il[k] = jmin; 1012 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1013 } 1014 } 1015 } while (sctx.chshift); 1016 ierr = PetscFree(il);CHKERRQ(ierr); 1017 1018 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1020 C->assembled = PETSC_TRUE; 1021 C->preallocated = PETSC_TRUE; 1022 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 1023 if (sctx.nshift){ 1024 if (shiftnz) { 1025 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1026 } else if (shiftpd) { 1027 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1028 } 1029 } 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #include "petscbt.h" 1034 #include "../src/mat/utils/freespace.h" 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 1037 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 Mat_SeqSBAIJ *b; 1041 Mat B; 1042 PetscErrorCode ierr; 1043 PetscTruth perm_identity; 1044 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui; 1045 const PetscInt *rip; 1046 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1047 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1048 PetscReal fill=info->fill,levels=info->levels; 1049 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1050 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1051 PetscBT lnkbt; 1052 1053 PetscFunctionBegin; 1054 if (bs > 1){ 1055 if (!a->sbaijMat){ 1056 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 1057 } 1058 (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 1059 ierr = MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1064 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1065 1066 /* special case that simply copies fill pattern */ 1067 if (!levels && perm_identity) { 1068 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1069 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1070 for (i=0; i<am; i++) { 1071 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1072 } 1073 B = fact; 1074 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1075 1076 1077 b = (Mat_SeqSBAIJ*)B->data; 1078 uj = b->j; 1079 for (i=0; i<am; i++) { 1080 aj = a->j + a->diag[i]; 1081 for (j=0; j<ui[i]; j++){ 1082 *uj++ = *aj++; 1083 } 1084 b->ilen[i] = ui[i]; 1085 } 1086 ierr = PetscFree(ui);CHKERRQ(ierr); 1087 B->factor = MAT_FACTOR_NONE; 1088 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1089 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1090 B->factor = MAT_FACTOR_ICC; 1091 1092 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 /* initialization */ 1097 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1098 ui[0] = 0; 1099 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1100 1101 /* jl: linked list for storing indices of the pivot rows 1102 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1103 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1104 il = jl + am; 1105 uj_ptr = (PetscInt**)(il + am); 1106 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1107 for (i=0; i<am; i++){ 1108 jl[i] = am; il[i] = 0; 1109 } 1110 1111 /* create and initialize a linked list for storing column indices of the active row k */ 1112 nlnk = am + 1; 1113 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1114 1115 /* initial FreeSpace size is fill*(ai[am]+1) */ 1116 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1117 current_space = free_space; 1118 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1119 current_space_lvl = free_space_lvl; 1120 1121 for (k=0; k<am; k++){ /* for each active row k */ 1122 /* initialize lnk by the column indices of row rip[k] of A */ 1123 nzk = 0; 1124 ncols = ai[rip[k]+1] - ai[rip[k]]; 1125 ncols_upper = 0; 1126 cols = cols_lvl + am; 1127 for (j=0; j<ncols; j++){ 1128 i = rip[*(aj + ai[rip[k]] + j)]; 1129 if (i >= k){ /* only take upper triangular entry */ 1130 cols[ncols_upper] = i; 1131 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1132 ncols_upper++; 1133 } 1134 } 1135 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1136 nzk += nlnk; 1137 1138 /* update lnk by computing fill-in for each pivot row to be merged in */ 1139 prow = jl[k]; /* 1st pivot row */ 1140 1141 while (prow < k){ 1142 nextprow = jl[prow]; 1143 1144 /* merge prow into k-th row */ 1145 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1146 jmax = ui[prow+1]; 1147 ncols = jmax-jmin; 1148 i = jmin - ui[prow]; 1149 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1150 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1151 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1152 nzk += nlnk; 1153 1154 /* update il and jl for prow */ 1155 if (jmin < jmax){ 1156 il[prow] = jmin; 1157 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1158 } 1159 prow = nextprow; 1160 } 1161 1162 /* if free space is not available, make more free space */ 1163 if (current_space->local_remaining<nzk) { 1164 i = am - k + 1; /* num of unfactored rows */ 1165 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1166 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1167 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1168 reallocs++; 1169 } 1170 1171 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1172 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1173 1174 /* add the k-th row into il and jl */ 1175 if (nzk-1 > 0){ 1176 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1177 jl[k] = jl[i]; jl[i] = k; 1178 il[k] = ui[k] + 1; 1179 } 1180 uj_ptr[k] = current_space->array; 1181 uj_lvl_ptr[k] = current_space_lvl->array; 1182 1183 current_space->array += nzk; 1184 current_space->local_used += nzk; 1185 current_space->local_remaining -= nzk; 1186 1187 current_space_lvl->array += nzk; 1188 current_space_lvl->local_used += nzk; 1189 current_space_lvl->local_remaining -= nzk; 1190 1191 ui[k+1] = ui[k] + nzk; 1192 } 1193 1194 #if defined(PETSC_USE_INFO) 1195 if (ai[am] != 0) { 1196 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 1197 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1198 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1199 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1202 } 1203 #endif 1204 1205 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1206 ierr = PetscFree(jl);CHKERRQ(ierr); 1207 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1208 1209 /* destroy list of free space and other temporary array(s) */ 1210 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1211 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1212 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1213 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1214 1215 /* put together the new matrix in MATSEQSBAIJ format */ 1216 B = fact; 1217 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1218 1219 b = (Mat_SeqSBAIJ*)B->data; 1220 b->singlemalloc = PETSC_FALSE; 1221 b->free_a = PETSC_TRUE; 1222 b->free_ij = PETSC_TRUE; 1223 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1224 b->j = uj; 1225 b->i = ui; 1226 b->diag = 0; 1227 b->ilen = 0; 1228 b->imax = 0; 1229 b->row = perm; 1230 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1231 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1232 b->icol = perm; 1233 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1234 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1235 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1236 b->maxnz = b->nz = ui[am]; 1237 1238 B->info.factor_mallocs = reallocs; 1239 B->info.fill_ratio_given = fill; 1240 if (ai[am] != 0) { 1241 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1242 } else { 1243 B->info.fill_ratio_needed = 0.0; 1244 } 1245 if (perm_identity){ 1246 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1247 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1248 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1249 } else { 1250 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1251 } 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 1257 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1258 { 1259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1260 Mat_SeqSBAIJ *b; 1261 Mat B; 1262 PetscErrorCode ierr; 1263 PetscTruth perm_identity; 1264 PetscReal fill = info->fill; 1265 const PetscInt *rip; 1266 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 1267 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1268 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1269 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1270 PetscBT lnkbt; 1271 1272 PetscFunctionBegin; 1273 if (bs > 1) { /* convert to seqsbaij */ 1274 if (!a->sbaijMat){ 1275 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 1276 } 1277 (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 1278 ierr = MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* check whether perm is the identity mapping */ 1283 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1284 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 1285 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1286 1287 /* initialization */ 1288 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1289 ui[0] = 0; 1290 1291 /* jl: linked list for storing indices of the pivot rows 1292 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1293 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1294 il = jl + mbs; 1295 cols = il + mbs; 1296 ui_ptr = (PetscInt**)(cols + mbs); 1297 for (i=0; i<mbs; i++){ 1298 jl[i] = mbs; il[i] = 0; 1299 } 1300 1301 /* create and initialize a linked list for storing column indices of the active row k */ 1302 nlnk = mbs + 1; 1303 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1304 1305 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1306 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1307 current_space = free_space; 1308 1309 for (k=0; k<mbs; k++){ /* for each active row k */ 1310 /* initialize lnk by the column indices of row rip[k] of A */ 1311 nzk = 0; 1312 ncols = ai[rip[k]+1] - ai[rip[k]]; 1313 ncols_upper = 0; 1314 for (j=0; j<ncols; j++){ 1315 i = rip[*(aj + ai[rip[k]] + j)]; 1316 if (i >= k){ /* only take upper triangular entry */ 1317 cols[ncols_upper] = i; 1318 ncols_upper++; 1319 } 1320 } 1321 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1322 nzk += nlnk; 1323 1324 /* update lnk by computing fill-in for each pivot row to be merged in */ 1325 prow = jl[k]; /* 1st pivot row */ 1326 1327 while (prow < k){ 1328 nextprow = jl[prow]; 1329 /* merge prow into k-th row */ 1330 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1331 jmax = ui[prow+1]; 1332 ncols = jmax-jmin; 1333 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1334 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1335 nzk += nlnk; 1336 1337 /* update il and jl for prow */ 1338 if (jmin < jmax){ 1339 il[prow] = jmin; 1340 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1341 } 1342 prow = nextprow; 1343 } 1344 1345 /* if free space is not available, make more free space */ 1346 if (current_space->local_remaining<nzk) { 1347 i = mbs - k + 1; /* num of unfactored rows */ 1348 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1349 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1350 reallocs++; 1351 } 1352 1353 /* copy data into free space, then initialize lnk */ 1354 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1355 1356 /* add the k-th row into il and jl */ 1357 if (nzk-1 > 0){ 1358 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1359 jl[k] = jl[i]; jl[i] = k; 1360 il[k] = ui[k] + 1; 1361 } 1362 ui_ptr[k] = current_space->array; 1363 current_space->array += nzk; 1364 current_space->local_used += nzk; 1365 current_space->local_remaining -= nzk; 1366 1367 ui[k+1] = ui[k] + nzk; 1368 } 1369 1370 #if defined(PETSC_USE_INFO) 1371 if (ai[mbs] != 0) { 1372 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1373 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1374 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1375 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1376 } else { 1377 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1378 } 1379 #endif 1380 1381 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1382 ierr = PetscFree(jl);CHKERRQ(ierr); 1383 1384 /* destroy list of free space and other temporary array(s) */ 1385 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1386 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1387 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1388 1389 /* put together the new matrix in MATSEQSBAIJ format */ 1390 B = fact; 1391 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1392 1393 b = (Mat_SeqSBAIJ*)B->data; 1394 b->singlemalloc = PETSC_FALSE; 1395 b->free_a = PETSC_TRUE; 1396 b->free_ij = PETSC_TRUE; 1397 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1398 b->j = uj; 1399 b->i = ui; 1400 b->diag = 0; 1401 b->ilen = 0; 1402 b->imax = 0; 1403 b->row = perm; 1404 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1405 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1406 b->icol = perm; 1407 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1408 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1409 ierr = PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1410 b->maxnz = b->nz = ui[mbs]; 1411 1412 B->info.factor_mallocs = reallocs; 1413 B->info.fill_ratio_given = fill; 1414 if (ai[mbs] != 0) { 1415 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1416 } else { 1417 B->info.fill_ratio_needed = 0.0; 1418 } 1419 if (perm_identity){ 1420 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1421 } else { 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 /* --------------------------------------------------------- */ 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct" 1430 PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 1431 { 1432 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1433 PetscErrorCode ierr; 1434 const PetscInt *ai=a->i,*aj=a->j,*vi; 1435 PetscInt i,k,n=a->mbs; 1436 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2,*adiag=a->diag; 1437 MatScalar *aa=a->a,*v; 1438 PetscScalar *x,*b,*s,*t,*ls; 1439 1440 PetscFunctionBegin; 1441 /* printf("MatSolve_SeqBAIJ_NaturalOrdering_iludt..bs %d\n",bs); */ 1442 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1443 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1444 t = a->solve_work; 1445 1446 /* forward solve the lower triangular */ 1447 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy 1st block of b to t */ 1448 1449 for (i=1; i<n; i++) { 1450 v = aa + bs2*ai[i]; 1451 vi = aj + ai[i]; 1452 nz = ai[i+1] - ai[i]; 1453 s = t + bs*i; 1454 ierr = PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy i_th block of b to t */ 1455 for(k=0;k<nz;k++){ 1456 Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]); 1457 v += bs2; 1458 } 1459 } 1460 1461 /* backward solve the upper triangular */ 1462 ls = a->solve_work + A->cmap->n; 1463 for (i=n-1; i>=0; i--){ 1464 v = aa + bs2*ai[2*n-i]; 1465 vi = aj + ai[2*n-i]; 1466 nz = ai[2*n-i +1] - ai[2*n-i]-1; 1467 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1468 for(k=0;k<nz;k++){ 1469 Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]); 1470 v += bs2; 1471 } 1472 Kernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */ 1473 ierr = PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1474 } 1475 1476 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1477 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1478 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2" 1484 PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2(Mat A,Vec bb,Vec xx) 1485 { 1486 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1487 PetscErrorCode ierr; 1488 const PetscInt *ai=a->i,*aj=a->j,*adiag=a->diag,*vi; 1489 PetscInt i,k,n=a->mbs; 1490 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2; 1491 MatScalar *aa=a->a,*v; 1492 PetscScalar *x,*b,*s,*t,*ls; 1493 1494 PetscFunctionBegin; 1495 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1496 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1497 t = a->solve_work; 1498 1499 /* forward solve the lower triangular */ 1500 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy 1st block of b to t */ 1501 1502 for (i=1; i<n; i++) { 1503 v = aa + bs2*ai[i]; 1504 vi = aj + ai[i]; 1505 nz = ai[i+1] - ai[i]; 1506 s = t + bs*i; 1507 ierr = PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy i_th block of b to t */ 1508 for(k=0;k<nz;k++){ 1509 Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]); 1510 v += bs2; 1511 } 1512 } 1513 1514 /* backward solve the upper triangular */ 1515 ls = a->solve_work + A->cmap->n; 1516 for (i=n-1; i>=0; i--){ 1517 v = aa + bs2*(adiag[i+1]+1); 1518 vi = aj + adiag[i+1]+1; 1519 nz = adiag[i] - adiag[i+1]-1; 1520 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1521 for(k=0;k<nz;k++){ 1522 Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]); 1523 v += bs2; 1524 } 1525 Kernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */ 1526 ierr = PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1527 } 1528 1529 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1530 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1531 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatSolve_SeqBAIJ_N_newdatastruct" 1537 PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat A,Vec bb,Vec xx) 1538 { 1539 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1540 IS iscol=a->col,isrow=a->row; 1541 PetscErrorCode ierr; 1542 const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*vi; 1543 PetscInt i,m,n=a->mbs; 1544 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2,k; 1545 MatScalar *aa=a->a,*v; 1546 PetscScalar *x,*b,*s,*t,*ls; 1547 1548 PetscFunctionBegin; 1549 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1550 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1551 t = a->solve_work; 1552 1553 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1554 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1555 1556 /* forward solve the lower triangular */ 1557 ierr = PetscMemcpy(t,b+bs*r[0],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1558 for (i=1; i<n; i++) { 1559 v = aa + bs2*ai[i]; 1560 vi = aj + ai[i]; 1561 nz = ai[i+1] - ai[i]; 1562 s = t + bs*i; 1563 ierr = PetscMemcpy(s,b+bs*r[i],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1564 for(m=0;m<nz;m++){ 1565 Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[m]); 1566 v += bs2; 1567 } 1568 } 1569 1570 /* backward solve the upper triangular */ 1571 ls = a->solve_work + A->cmap->n; 1572 for (i=n-1; i>=0; i--){ 1573 k = 2*n-i; 1574 v = aa + bs2*ai[k]; 1575 vi = aj + ai[k]; 1576 nz = ai[k+1] - ai[k] - 1; 1577 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1578 for(m=0;m<nz;m++){ 1579 Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[m]); 1580 v += bs2; 1581 } 1582 Kernel_w_gets_A_times_v(bs,ls,v,t+i*bs); /* *inv(diagonal[i]) */ 1583 ierr = PetscMemcpy(x + bs*c[i],t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1584 } 1585 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1586 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1587 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1589 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "BlockAbs_privat" 1595 PetscErrorCode BlockAbs_private(PetscInt nbs,PetscInt bs2,PetscScalar *blockarray,PetscReal *absarray) 1596 { 1597 PetscErrorCode ierr; 1598 PetscInt i,j; 1599 PetscFunctionBegin; 1600 ierr = PetscMemzero(absarray,(nbs+1)*sizeof(PetscReal));CHKERRQ(ierr); 1601 for (i=0; i<nbs; i++){ 1602 for (j=0; j<bs2; j++){ 1603 if (absarray[i] < PetscAbsScalar(blockarray[i*nbs+j])) absarray[i] = PetscAbsScalar(blockarray[i*nbs+j]); 1604 } 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatILUDTFactor_SeqBAIJ" 1611 PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 1612 { 1613 Mat B = *fact; 1614 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b; 1615 IS isicol; 1616 PetscErrorCode ierr; 1617 const PetscInt *r,*ic; 1618 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,bs2=a->bs2,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 1619 PetscInt *bi,*bj,*bdiag; 1620 1621 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1622 PetscInt nlnk,*lnk; 1623 PetscBT lnkbt; 1624 PetscTruth row_identity,icol_identity,both_identity; 1625 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,*multiplier,*vtmp; 1626 const PetscInt *ics; 1627 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1628 1629 PetscReal dt=info->dt; /* shift=info->shiftinblocks; */ 1630 PetscInt nnz_max; 1631 PetscTruth missing; 1632 PetscReal *vtmp_abs; 1633 MatScalar *v_work; 1634 PetscInt *v_pivots; 1635 1636 PetscFunctionBegin; 1637 /* ------- symbolic factorization, can be reused ---------*/ 1638 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1639 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1640 adiag=a->diag; 1641 1642 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1643 1644 /* bdiag is location of diagonal in factor */ 1645 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1646 1647 /* allocate row pointers bi */ 1648 ierr = PetscMalloc((2*mbs+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1649 1650 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 1651 dtcount = (PetscInt)info->dtcount; 1652 if (dtcount > mbs-1) dtcount = mbs-1; 1653 nnz_max = ai[mbs]+2*mbs*dtcount +2; 1654 /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */ 1655 ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1656 nnz_max = nnz_max*bs2; 1657 ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr); 1658 1659 /* put together the new matrix */ 1660 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1661 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 1662 b = (Mat_SeqBAIJ*)(B)->data; 1663 b->free_a = PETSC_TRUE; 1664 b->free_ij = PETSC_TRUE; 1665 b->singlemalloc = PETSC_FALSE; 1666 b->a = ba; 1667 b->j = bj; 1668 b->i = bi; 1669 b->diag = bdiag; 1670 b->ilen = 0; 1671 b->imax = 0; 1672 b->row = isrow; 1673 b->col = iscol; 1674 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1675 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1676 b->icol = isicol; 1677 ierr = PetscMalloc((bs*(mbs+1))*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1678 1679 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1680 b->maxnz = nnz_max/bs2; 1681 1682 (B)->factor = MAT_FACTOR_ILUDT; 1683 (B)->info.factor_mallocs = 0; 1684 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)(ai[mbs]*bs2)); 1685 CHKMEMQ; 1686 /* ------- end of symbolic factorization ---------*/ 1687 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1688 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1689 ics = ic; 1690 1691 /* linked list for storing column indices of the active row */ 1692 nlnk = mbs + 1; 1693 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1694 1695 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 1696 ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 1697 jtmp = im + mbs; 1698 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 1699 ierr = PetscMalloc((2*mbs*bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1700 vtmp = rtmp + bs2*mbs; 1701 ierr = PetscMalloc((mbs+1)*sizeof(PetscReal),&vtmp_abs);CHKERRQ(ierr); 1702 1703 ierr = PetscMalloc(bs*sizeof(PetscInt) + (bs+bs2)*sizeof(MatScalar),&v_work);CHKERRQ(ierr); 1704 multiplier = v_work + bs; 1705 v_pivots = (PetscInt*)(multiplier + bs2); 1706 1707 bi[0] = 0; 1708 bdiag[0] = (nnz_max/bs2)-1; /* location of diagonal in factor B */ 1709 bi[2*mbs+1] = bdiag[0]+1; /* endof bj and ba array */ 1710 for (i=0; i<mbs; i++) { 1711 /* copy initial fill into linked list */ 1712 nzi = 0; /* nonzeros for active row i */ 1713 nzi = ai[r[i]+1] - ai[r[i]]; 1714 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1715 nzi_al = adiag[r[i]] - ai[r[i]]; 1716 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 1717 /* printf("row %d, nzi_al/au %d %d\n",i,nzi_al,nzi_au); */ 1718 1719 /* load in initial unfactored row */ 1720 ajtmp = aj + ai[r[i]]; 1721 ierr = PetscLLAddPerm(nzi,ajtmp,ic,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1722 ierr = PetscMemzero(rtmp,(mbs*bs2+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1723 aatmp = a->a + bs2*ai[r[i]]; 1724 for (j=0; j<nzi; j++) { 1725 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],aatmp+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1726 } 1727 1728 /* add pivot rows into linked list */ 1729 row = lnk[mbs]; 1730 while (row < i) { 1731 nzi_bl = bi[row+1] - bi[row] + 1; 1732 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 1733 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 1734 nzi += nlnk; 1735 row = lnk[row]; 1736 } 1737 1738 /* copy data from lnk into jtmp, then initialize lnk */ 1739 ierr = PetscLLClean(mbs,mbs,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 1740 1741 /* numerical factorization */ 1742 bjtmp = jtmp; 1743 row = *bjtmp++; /* 1st pivot row */ 1744 1745 while (row < i) { 1746 pc = rtmp + bs2*row; 1747 pv = ba + bs2*bdiag[row]; /* inv(diag) of the pivot row */ 1748 Kernel_A_gets_A_times_B(bs,pc,pv,multiplier); /* pc= multiplier = pc*inv(diag[row]) */ 1749 ierr = BlockAbs_private(1,bs2,pc,vtmp_abs);CHKERRQ(ierr); 1750 if (vtmp_abs[0] > dt){ /* apply tolerance dropping rule */ 1751 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 1752 pv = ba + bs2*(bdiag[row+1] + 1); 1753 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 1754 for (j=0; j<nz; j++){ 1755 Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 1756 } 1757 /* ierr = PetscLogFlops(bslog*(nz+1.0)-bs);CHKERRQ(ierr); */ 1758 } 1759 row = *bjtmp++; 1760 } 1761 1762 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 1763 nzi_bl = 0; j = 0; 1764 while (jtmp[j] < i){ /* L-part. Note: jtmp is sorted */ 1765 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1766 nzi_bl++; j++; 1767 } 1768 nzi_bu = nzi - nzi_bl -1; 1769 /* printf("nzi %d, nzi_bl %d, nzi_bu %d\n",nzi,nzi_bl,nzi_bu); */ 1770 1771 while (j < nzi){ /* U-part */ 1772 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773 /* 1774 printf(" col %d: ",jtmp[j]); 1775 for (j1=0; j1<bs2; j1++) printf(" %g",*(vtmp+bs2*j+j1)); 1776 printf(" \n"); 1777 */ 1778 j++; 1779 } 1780 1781 ierr = BlockAbs_private(nzi,bs2,vtmp,vtmp_abs);CHKERRQ(ierr); 1782 /* 1783 printf(" row %d, nzi %d, vtmp_abs\n",i,nzi); 1784 for (j1=0; j1<nzi; j1++) printf(" (%d %g),",jtmp[j1],vtmp_abs[j1]); 1785 printf(" \n"); 1786 */ 1787 bjtmp = bj + bi[i]; 1788 batmp = ba + bs2*bi[i]; 1789 /* apply level dropping rule to L part */ 1790 ncut = nzi_al + dtcount; 1791 if (ncut < nzi_bl){ 1792 ierr = PetscSortSplitReal(ncut,nzi_bl,vtmp_abs,jtmp);CHKERRQ(ierr); 1793 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 1794 } else { 1795 ncut = nzi_bl; 1796 } 1797 for (j=0; j<ncut; j++){ 1798 bjtmp[j] = jtmp[j]; 1799 ierr = PetscMemcpy(batmp+bs2*j,rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1800 /* 1801 printf(" col %d: ",bjtmp[j]); 1802 for (j1=0; j1<bs2; j1++) printf(" %g,",*(batmp+bs2*j+j1)); 1803 printf("\n"); 1804 */ 1805 } 1806 bi[i+1] = bi[i] + ncut; 1807 nzi = ncut + 1; 1808 1809 /* apply level dropping rule to U part */ 1810 ncut = nzi_au + dtcount; 1811 if (ncut < nzi_bu){ 1812 ierr = PetscSortSplitReal(ncut,nzi_bu,vtmp_abs+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 1813 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 1814 } else { 1815 ncut = nzi_bu; 1816 } 1817 nzi += ncut; 1818 1819 /* mark bdiagonal */ 1820 bdiag[i+1] = bdiag[i] - (ncut + 1); 1821 bi[2*mbs - i] = bi[2*mbs - i +1] - (ncut + 1); 1822 1823 bjtmp = bj + bdiag[i]; 1824 batmp = ba + bs2*bdiag[i]; 1825 ierr = PetscMemcpy(batmp,rtmp+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1826 *bjtmp = i; 1827 /* 1828 printf(" diag %d: ",*bjtmp); 1829 for (j=0; j<bs2; j++){ 1830 printf(" %g,",batmp[j]); 1831 } 1832 printf("\n"); 1833 */ 1834 bjtmp = bj + bdiag[i+1]+1; 1835 batmp = ba + (bdiag[i+1]+1)*bs2; 1836 1837 for (k=0; k<ncut; k++){ 1838 bjtmp[k] = jtmp[nzi_bl+1+k]; 1839 ierr = PetscMemcpy(batmp+bs2*k,rtmp+bs2*bjtmp[k],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1840 /* 1841 printf(" col %d:",bjtmp[k]); 1842 for (j1=0; j1<bs2; j1++) printf(" %g,",*(batmp+bs2*k+j1)); 1843 printf("\n"); 1844 */ 1845 } 1846 1847 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 1848 1849 /* invert diagonal block for simplier triangular solves - add shift??? */ 1850 batmp = ba + bs2*bdiag[i]; 1851 ierr = Kernel_A_gets_inverse_A(bs,batmp,v_pivots,v_work);CHKERRQ(ierr); 1852 } /* for (i=0; i<mbs; i++) */ 1853 1854 /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */ 1855 if (bi[mbs] >= bdiag[mbs]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[mbs],bdiag[mbs]); 1856 1857 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1858 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1859 1860 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1861 1862 ierr = PetscFree(im);CHKERRQ(ierr); 1863 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1864 1865 ierr = PetscLogFlops(bs2*B->cmap->n);CHKERRQ(ierr); 1866 b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs]; 1867 1868 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1869 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 1870 both_identity = (PetscTruth) (row_identity && icol_identity); 1871 if (row_identity && icol_identity) { 1872 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 1873 } else { 1874 B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 1875 } 1876 1877 B->ops->solveadd = 0; 1878 B->ops->solvetranspose = 0; 1879 B->ops->solvetransposeadd = 0; 1880 B->ops->matsolve = 0; 1881 B->assembled = PETSC_TRUE; 1882 B->preallocated = PETSC_TRUE; 1883 PetscFunctionReturn(0); 1884 } 1885