1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "../src/mat/impls/baij/seq/baij.h" 7 #include "../src/mat/blockinvert.h" 8 9 /* ------------------------------------------------------------*/ 10 /* 11 Version for when blocks are 2 by 2 12 */ 13 /* MatLUFactorNumeric_SeqBAIJ_2_newdatastruct - 14 copied from MatLUFactorNumeric_SeqBAIJ_N_newdatastruct() and re-implemented 15 Kernel_A_gets_A_times_B() 16 Kernel_A_gets_A_minus_B_times_C() 17 Kernel_A_gets_inverse_A() 18 */ 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_newdatastruct" 21 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 22 { 23 Mat C=B; 24 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; 25 IS isrow = b->row,isicol = b->icol; 26 PetscErrorCode ierr; 27 const PetscInt *r,*ic,*ics; 28 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 29 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 30 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 31 PetscInt bs=A->rmap->bs,bs2 = a->bs2,flg; 32 MatScalar *x,m1,m2,m3,m4; 33 PetscReal shift = info->shiftinblocks; 34 35 PetscFunctionBegin; 36 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 37 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 38 39 /* generate work space needed by LU factorization */ 40 ierr = PetscMalloc((bs2*n+bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 41 mwork = rtmp + bs2*n; 42 ierr = PetscMemzero(rtmp,(bs2*n+1)*sizeof(MatScalar));CHKERRQ(ierr); 43 ics = ic; 44 45 for (i=0; i<n; i++){ 46 /* zero rtmp */ 47 /* L part */ 48 nz = bi[i+1] - bi[i]; 49 bjtmp = bj + bi[i]; 50 for (j=0; j<nz; j++){ 51 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 52 } 53 54 /* U part */ 55 nz = bi[2*n-i+1] - bi[2*n-i]; 56 bjtmp = bj + bi[2*n-i]; 57 for (j=0; j<nz; j++){ 58 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 59 } 60 61 /* load in initial (unfactored row) */ 62 nz = ai[r[i]+1] - ai[r[i]]; 63 ajtmp = aj + ai[r[i]]; 64 v = aa + bs2*ai[r[i]]; 65 for (j=0; j<nz; j++) { 66 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 67 } 68 69 /* elimination */ 70 bjtmp = bj + bi[i]; 71 row = *bjtmp++; 72 nzL = bi[i+1] - bi[i]; 73 k = 0; 74 while (k < nzL) { 75 pc = rtmp + bs2*row; 76 for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} 77 if (flg) { 78 pv = b->a + bs2*bdiag[row]; 79 /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); */ 80 /* ---Kernel_A_gets_A_times_B_2(): *pc = *pc * (*pv); ------- */ 81 ierr = PetscMemcpy(mwork,pc,bs2*sizeof(MatScalar));CHKERRQ(ierr); 82 pc[0] = m1 = mwork[0]*pv[0] + mwork[2]*pv[1]; 83 pc[1] = m2 = mwork[1]*pv[0] + mwork[3]*pv[1]; 84 pc[2] = m3 = mwork[0]*pv[2] + mwork[2]*pv[3]; 85 pc[3] = m4 = mwork[1]*pv[2] + mwork[3]*pv[3]; 86 /* ------------ endof Kernel_A_gets_A_times_B_2() ----------- */ 87 88 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 89 pv = b->a + bs2*bi[2*n-row]; 90 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries inU(row,:), excluding diag */ 91 for (j=0; j<nz; j++) { 92 /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 93 /* -- Kernel_A_gets_A_minus_B_times_C_2(): rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 94 x = rtmp + 4*pj[j]; 95 x[0] -= m1*pv[0] + m3*pv[1]; 96 x[1] -= m2*pv[0] + m4*pv[1]; 97 x[2] -= m1*pv[2] + m3*pv[3]; 98 x[3] -= m2*pv[2] + m4*pv[3]; 99 pv += 4; 100 /*------------ endof Kernel_A_gets_A_minus_B_times_C_2() -- */ 101 } 102 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 103 } 104 row = *bjtmp++; k++; 105 } 106 107 /* finished row so stick it into b->a */ 108 /* L part */ 109 pv = b->a + bs2*bi[i] ; 110 pj = b->j + bi[i] ; 111 nz = bi[i+1] - bi[i]; 112 for (j=0; j<nz; j++) { 113 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 114 } 115 116 /* Mark diagonal and invert diagonal for simplier triangular solves */ 117 pv = b->a + bs2*bdiag[i]; 118 pj = b->j + bdiag[i]; 119 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 120 // ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); 121 ierr = Kernel_A_gets_inverse_A_2(pv,shift);CHKERRQ(ierr); 122 123 /* U part */ 124 pv = b->a + bs2*bi[2*n-i]; 125 pj = b->j + bi[2*n-i]; 126 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 127 for (j=0; j<nz; j++){ 128 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 129 } 130 } 131 132 ierr = PetscFree(rtmp);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 134 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 135 136 C->assembled = PETSC_TRUE; 137 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 143 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info) 144 { 145 Mat C = B; 146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 147 IS isrow = b->row,isicol = b->icol; 148 PetscErrorCode ierr; 149 const PetscInt *r,*ic; 150 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 151 PetscInt *ajtmpold,*ajtmp,nz,row; 152 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 153 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 154 MatScalar p1,p2,p3,p4; 155 MatScalar *ba = b->a,*aa = a->a; 156 PetscReal shift = info->shiftinblocks; 157 158 PetscFunctionBegin; 159 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 162 163 for (i=0; i<n; i++) { 164 nz = bi[i+1] - bi[i]; 165 ajtmp = bj + bi[i]; 166 for (j=0; j<nz; j++) { 167 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 168 } 169 /* load in initial (unfactored row) */ 170 idx = r[i]; 171 nz = ai[idx+1] - ai[idx]; 172 ajtmpold = aj + ai[idx]; 173 v = aa + 4*ai[idx]; 174 for (j=0; j<nz; j++) { 175 x = rtmp+4*ic[ajtmpold[j]]; 176 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 177 v += 4; 178 } 179 row = *ajtmp++; 180 while (row < i) { 181 pc = rtmp + 4*row; 182 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 183 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 184 pv = ba + 4*diag_offset[row]; 185 pj = bj + diag_offset[row] + 1; 186 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 187 pc[0] = m1 = p1*x1 + p3*x2; 188 pc[1] = m2 = p2*x1 + p4*x2; 189 pc[2] = m3 = p1*x3 + p3*x4; 190 pc[3] = m4 = p2*x3 + p4*x4; 191 nz = bi[row+1] - diag_offset[row] - 1; 192 pv += 4; 193 for (j=0; j<nz; j++) { 194 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 195 x = rtmp + 4*pj[j]; 196 x[0] -= m1*x1 + m3*x2; 197 x[1] -= m2*x1 + m4*x2; 198 x[2] -= m1*x3 + m3*x4; 199 x[3] -= m2*x3 + m4*x4; 200 pv += 4; 201 } 202 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 203 } 204 row = *ajtmp++; 205 } 206 /* finished row so stick it into b->a */ 207 pv = ba + 4*bi[i]; 208 pj = bj + bi[i]; 209 nz = bi[i+1] - bi[i]; 210 for (j=0; j<nz; j++) { 211 x = rtmp+4*pj[j]; 212 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 213 pv += 4; 214 } 215 /* invert diagonal block */ 216 w = ba + 4*diag_offset[i]; 217 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 218 } 219 220 ierr = PetscFree(rtmp);CHKERRQ(ierr); 221 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 223 C->ops->solve = MatSolve_SeqBAIJ_2; 224 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 225 C->assembled = PETSC_TRUE; 226 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 227 PetscFunctionReturn(0); 228 } 229 /* 230 Version for when blocks are 2 by 2 Using natural ordering 231 */ 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 234 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 235 { 236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 237 PetscErrorCode ierr; 238 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 239 PetscInt *ajtmpold,*ajtmp,nz,row; 240 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 241 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 242 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 243 MatScalar *ba = b->a,*aa = a->a; 244 PetscReal shift = info->shiftinblocks; 245 246 PetscFunctionBegin; 247 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 248 for (i=0; i<n; i++) { 249 nz = bi[i+1] - bi[i]; 250 ajtmp = bj + bi[i]; 251 for (j=0; j<nz; j++) { 252 x = rtmp+4*ajtmp[j]; 253 x[0] = x[1] = x[2] = x[3] = 0.0; 254 } 255 /* load in initial (unfactored row) */ 256 nz = ai[i+1] - ai[i]; 257 ajtmpold = aj + ai[i]; 258 v = aa + 4*ai[i]; 259 for (j=0; j<nz; j++) { 260 x = rtmp+4*ajtmpold[j]; 261 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 262 v += 4; 263 } 264 row = *ajtmp++; 265 while (row < i) { 266 pc = rtmp + 4*row; 267 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 268 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 269 pv = ba + 4*diag_offset[row]; 270 pj = bj + diag_offset[row] + 1; 271 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 272 pc[0] = m1 = p1*x1 + p3*x2; 273 pc[1] = m2 = p2*x1 + p4*x2; 274 pc[2] = m3 = p1*x3 + p3*x4; 275 pc[3] = m4 = p2*x3 + p4*x4; 276 nz = bi[row+1] - diag_offset[row] - 1; 277 pv += 4; 278 for (j=0; j<nz; j++) { 279 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 280 x = rtmp + 4*pj[j]; 281 x[0] -= m1*x1 + m3*x2; 282 x[1] -= m2*x1 + m4*x2; 283 x[2] -= m1*x3 + m3*x4; 284 x[3] -= m2*x3 + m4*x4; 285 pv += 4; 286 } 287 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 288 } 289 row = *ajtmp++; 290 } 291 /* finished row so stick it into b->a */ 292 pv = ba + 4*bi[i]; 293 pj = bj + bi[i]; 294 nz = bi[i+1] - bi[i]; 295 for (j=0; j<nz; j++) { 296 x = rtmp+4*pj[j]; 297 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 298 /* 299 printf(" col %d:",pj[j]); 300 PetscInt j1; 301 for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1)); 302 printf("\n"); 303 */ 304 pv += 4; 305 } 306 /* invert diagonal block */ 307 w = ba + 4*diag_offset[i]; 308 /* 309 printf(" \n%d -th: diag: ",i); 310 for (j=0; j<4; j++){ 311 printf(" %g,",w[j]); 312 } 313 printf("\n----------------------------\n"); 314 */ 315 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 316 } 317 318 ierr = PetscFree(rtmp);CHKERRQ(ierr); 319 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 320 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 321 C->assembled = PETSC_TRUE; 322 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 323 PetscFunctionReturn(0); 324 } 325 326 /* ----------------------------------------------------------- */ 327 /* 328 Version for when blocks are 1 by 1. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 332 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat C,Mat A,const MatFactorInfo *info) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 335 IS isrow = b->row,isicol = b->icol; 336 PetscErrorCode ierr; 337 const PetscInt *r,*ic; 338 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 339 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 340 PetscInt *diag_offset = b->diag,diag,*pj; 341 MatScalar *pv,*v,*rtmp,multiplier,*pc; 342 MatScalar *ba = b->a,*aa = a->a; 343 PetscTruth row_identity, col_identity; 344 345 PetscFunctionBegin; 346 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 347 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 348 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 349 350 for (i=0; i<n; i++) { 351 nz = bi[i+1] - bi[i]; 352 ajtmp = bj + bi[i]; 353 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 354 355 /* load in initial (unfactored row) */ 356 nz = ai[r[i]+1] - ai[r[i]]; 357 ajtmpold = aj + ai[r[i]]; 358 v = aa + ai[r[i]]; 359 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 360 361 row = *ajtmp++; 362 while (row < i) { 363 pc = rtmp + row; 364 if (*pc != 0.0) { 365 pv = ba + diag_offset[row]; 366 pj = bj + diag_offset[row] + 1; 367 multiplier = *pc * *pv++; 368 *pc = multiplier; 369 nz = bi[row+1] - diag_offset[row] - 1; 370 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 371 ierr = PetscLogFlops(1.0+2.0*nz);CHKERRQ(ierr); 372 } 373 row = *ajtmp++; 374 } 375 /* finished row so stick it into b->a */ 376 pv = ba + bi[i]; 377 pj = bj + bi[i]; 378 nz = bi[i+1] - bi[i]; 379 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 380 diag = diag_offset[i] - bi[i]; 381 /* check pivot entry for current row */ 382 if (pv[diag] == 0.0) { 383 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot: row in original ordering %D in permuted ordering %D",r[i],i); 384 } 385 pv[diag] = 1.0/pv[diag]; 386 } 387 388 ierr = PetscFree(rtmp);CHKERRQ(ierr); 389 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 390 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 391 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 392 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 393 if (row_identity && col_identity) { 394 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 395 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 396 } else { 397 C->ops->solve = MatSolve_SeqBAIJ_1; 398 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 399 } 400 C->assembled = PETSC_TRUE; 401 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 EXTERN_C_BEGIN 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetFactor_seqbaij_petsc" 408 PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 409 { 410 PetscInt n = A->rmap->n; 411 PetscErrorCode ierr; 412 413 PetscFunctionBegin; 414 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 415 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 416 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 417 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 418 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 419 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 420 (*B)->ops->iludtfactor = MatILUDTFactor_SeqBAIJ; 421 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 422 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 423 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 424 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 425 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 426 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 427 (*B)->factor = ftype; 428 PetscFunctionReturn(0); 429 } 430 EXTERN_C_END 431 432 EXTERN_C_BEGIN 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 435 PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 436 { 437 PetscFunctionBegin; 438 *flg = PETSC_TRUE; 439 PetscFunctionReturn(0); 440 } 441 EXTERN_C_END 442 443 /* ----------------------------------------------------------- */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 446 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 447 { 448 PetscErrorCode ierr; 449 Mat C; 450 451 PetscFunctionBegin; 452 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 453 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 454 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 455 A->ops->solve = C->ops->solve; 456 A->ops->solvetranspose = C->ops->solvetranspose; 457 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 458 ierr = PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #include "../src/mat/impls/sbaij/seq/sbaij.h" 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 465 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 466 { 467 PetscErrorCode ierr; 468 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 469 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 470 IS ip=b->row; 471 const PetscInt *rip; 472 PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol; 473 PetscInt *ai=a->i,*aj=a->j; 474 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 475 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 476 PetscReal zeropivot,rs,shiftnz; 477 PetscReal shiftpd; 478 ChShift_Ctx sctx; 479 PetscInt newshift; 480 481 PetscFunctionBegin; 482 if (bs > 1) { 483 if (!a->sbaijMat){ 484 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 485 } 486 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);CHKERRQ(ierr); 487 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 488 a->sbaijMat = PETSC_NULL; 489 PetscFunctionReturn(0); 490 } 491 492 /* initialization */ 493 shiftnz = info->shiftnz; 494 shiftpd = info->shiftpd; 495 zeropivot = info->zeropivot; 496 497 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 498 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 499 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 500 jl = il + mbs; 501 rtmp = (MatScalar*)(jl + mbs); 502 503 sctx.shift_amount = 0; 504 sctx.nshift = 0; 505 do { 506 sctx.chshift = PETSC_FALSE; 507 for (i=0; i<mbs; i++) { 508 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 509 } 510 511 for (k = 0; k<mbs; k++){ 512 bval = ba + bi[k]; 513 /* initialize k-th row by the perm[k]-th row of A */ 514 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 515 for (j = jmin; j < jmax; j++){ 516 col = rip[aj[j]]; 517 if (col >= k){ /* only take upper triangular entry */ 518 rtmp[col] = aa[j]; 519 *bval++ = 0.0; /* for in-place factorization */ 520 } 521 } 522 523 /* shift the diagonal of the matrix */ 524 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 525 526 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 527 dk = rtmp[k]; 528 i = jl[k]; /* first row to be added to k_th row */ 529 530 while (i < k){ 531 nexti = jl[i]; /* next row to be added to k_th row */ 532 533 /* compute multiplier, update diag(k) and U(i,k) */ 534 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 535 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 536 dk += uikdi*ba[ili]; 537 ba[ili] = uikdi; /* -U(i,k) */ 538 539 /* add multiple of row i to k-th row */ 540 jmin = ili + 1; jmax = bi[i+1]; 541 if (jmin < jmax){ 542 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 543 /* update il and jl for row i */ 544 il[i] = jmin; 545 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 546 } 547 i = nexti; 548 } 549 550 /* shift the diagonals when zero pivot is detected */ 551 /* compute rs=sum of abs(off-diagonal) */ 552 rs = 0.0; 553 jmin = bi[k]+1; 554 nz = bi[k+1] - jmin; 555 if (nz){ 556 bcol = bj + jmin; 557 while (nz--){ 558 rs += PetscAbsScalar(rtmp[*bcol]); 559 bcol++; 560 } 561 } 562 563 sctx.rs = rs; 564 sctx.pv = dk; 565 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 566 if (newshift == 1) break; 567 568 /* copy data into U(k,:) */ 569 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 570 jmin = bi[k]+1; jmax = bi[k+1]; 571 if (jmin < jmax) { 572 for (j=jmin; j<jmax; j++){ 573 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 574 } 575 /* add the k-th row into il and jl */ 576 il[k] = jmin; 577 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 578 } 579 } 580 } while (sctx.chshift); 581 ierr = PetscFree(il);CHKERRQ(ierr); 582 583 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 584 C->assembled = PETSC_TRUE; 585 C->preallocated = PETSC_TRUE; 586 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 587 if (sctx.nshift){ 588 if (shiftpd) { 589 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 590 } else if (shiftnz) { 591 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 592 } 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 599 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 600 { 601 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 602 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 603 PetscErrorCode ierr; 604 PetscInt i,j,am=a->mbs; 605 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 606 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 607 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 608 PetscReal zeropivot,rs,shiftnz; 609 PetscReal shiftpd; 610 ChShift_Ctx sctx; 611 PetscInt newshift; 612 613 PetscFunctionBegin; 614 /* initialization */ 615 shiftnz = info->shiftnz; 616 shiftpd = info->shiftpd; 617 zeropivot = info->zeropivot; 618 619 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 620 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 621 jl = il + am; 622 rtmp = (MatScalar*)(jl + am); 623 624 sctx.shift_amount = 0; 625 sctx.nshift = 0; 626 do { 627 sctx.chshift = PETSC_FALSE; 628 for (i=0; i<am; i++) { 629 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 630 } 631 632 for (k = 0; k<am; k++){ 633 /* initialize k-th row with elements nonzero in row perm(k) of A */ 634 nz = ai[k+1] - ai[k]; 635 acol = aj + ai[k]; 636 aval = aa + ai[k]; 637 bval = ba + bi[k]; 638 while (nz -- ){ 639 if (*acol < k) { /* skip lower triangular entries */ 640 acol++; aval++; 641 } else { 642 rtmp[*acol++] = *aval++; 643 *bval++ = 0.0; /* for in-place factorization */ 644 } 645 } 646 647 /* shift the diagonal of the matrix */ 648 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 649 650 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 651 dk = rtmp[k]; 652 i = jl[k]; /* first row to be added to k_th row */ 653 654 while (i < k){ 655 nexti = jl[i]; /* next row to be added to k_th row */ 656 /* compute multiplier, update D(k) and U(i,k) */ 657 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 658 uikdi = - ba[ili]*ba[bi[i]]; 659 dk += uikdi*ba[ili]; 660 ba[ili] = uikdi; /* -U(i,k) */ 661 662 /* add multiple of row i to k-th row ... */ 663 jmin = ili + 1; 664 nz = bi[i+1] - jmin; 665 if (nz > 0){ 666 bcol = bj + jmin; 667 bval = ba + jmin; 668 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 669 /* update il and jl for i-th row */ 670 il[i] = jmin; 671 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 672 } 673 i = nexti; 674 } 675 676 /* shift the diagonals when zero pivot is detected */ 677 /* compute rs=sum of abs(off-diagonal) */ 678 rs = 0.0; 679 jmin = bi[k]+1; 680 nz = bi[k+1] - jmin; 681 if (nz){ 682 bcol = bj + jmin; 683 while (nz--){ 684 rs += PetscAbsScalar(rtmp[*bcol]); 685 bcol++; 686 } 687 } 688 689 sctx.rs = rs; 690 sctx.pv = dk; 691 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 692 if (newshift == 1) break; /* sctx.shift_amount is updated */ 693 694 /* copy data into U(k,:) */ 695 ba[bi[k]] = 1.0/dk; 696 jmin = bi[k]+1; 697 nz = bi[k+1] - jmin; 698 if (nz){ 699 bcol = bj + jmin; 700 bval = ba + jmin; 701 while (nz--){ 702 *bval++ = rtmp[*bcol]; 703 rtmp[*bcol++] = 0.0; 704 } 705 /* add k-th row into il and jl */ 706 il[k] = jmin; 707 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 708 } 709 } 710 } while (sctx.chshift); 711 ierr = PetscFree(il);CHKERRQ(ierr); 712 713 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 714 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 715 C->assembled = PETSC_TRUE; 716 C->preallocated = PETSC_TRUE; 717 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 718 if (sctx.nshift){ 719 if (shiftnz) { 720 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 721 } else if (shiftpd) { 722 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 #include "petscbt.h" 729 #include "../src/mat/utils/freespace.h" 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 732 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 733 { 734 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 735 Mat_SeqSBAIJ *b; 736 Mat B; 737 PetscErrorCode ierr; 738 PetscTruth perm_identity; 739 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui; 740 const PetscInt *rip; 741 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 742 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 743 PetscReal fill=info->fill,levels=info->levels; 744 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 745 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 746 PetscBT lnkbt; 747 748 PetscFunctionBegin; 749 if (bs > 1){ 750 if (!a->sbaijMat){ 751 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 752 } 753 (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 754 ierr = MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 759 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 760 761 /* special case that simply copies fill pattern */ 762 if (!levels && perm_identity) { 763 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 764 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 765 for (i=0; i<am; i++) { 766 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 767 } 768 B = fact; 769 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 770 771 772 b = (Mat_SeqSBAIJ*)B->data; 773 uj = b->j; 774 for (i=0; i<am; i++) { 775 aj = a->j + a->diag[i]; 776 for (j=0; j<ui[i]; j++){ 777 *uj++ = *aj++; 778 } 779 b->ilen[i] = ui[i]; 780 } 781 ierr = PetscFree(ui);CHKERRQ(ierr); 782 B->factor = MAT_FACTOR_NONE; 783 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 784 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 B->factor = MAT_FACTOR_ICC; 786 787 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 788 PetscFunctionReturn(0); 789 } 790 791 /* initialization */ 792 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 793 ui[0] = 0; 794 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 795 796 /* jl: linked list for storing indices of the pivot rows 797 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 798 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 799 il = jl + am; 800 uj_ptr = (PetscInt**)(il + am); 801 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 802 for (i=0; i<am; i++){ 803 jl[i] = am; il[i] = 0; 804 } 805 806 /* create and initialize a linked list for storing column indices of the active row k */ 807 nlnk = am + 1; 808 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 809 810 /* initial FreeSpace size is fill*(ai[am]+1) */ 811 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 812 current_space = free_space; 813 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 814 current_space_lvl = free_space_lvl; 815 816 for (k=0; k<am; k++){ /* for each active row k */ 817 /* initialize lnk by the column indices of row rip[k] of A */ 818 nzk = 0; 819 ncols = ai[rip[k]+1] - ai[rip[k]]; 820 ncols_upper = 0; 821 cols = cols_lvl + am; 822 for (j=0; j<ncols; j++){ 823 i = rip[*(aj + ai[rip[k]] + j)]; 824 if (i >= k){ /* only take upper triangular entry */ 825 cols[ncols_upper] = i; 826 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 827 ncols_upper++; 828 } 829 } 830 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 831 nzk += nlnk; 832 833 /* update lnk by computing fill-in for each pivot row to be merged in */ 834 prow = jl[k]; /* 1st pivot row */ 835 836 while (prow < k){ 837 nextprow = jl[prow]; 838 839 /* merge prow into k-th row */ 840 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 841 jmax = ui[prow+1]; 842 ncols = jmax-jmin; 843 i = jmin - ui[prow]; 844 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 845 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 846 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 847 nzk += nlnk; 848 849 /* update il and jl for prow */ 850 if (jmin < jmax){ 851 il[prow] = jmin; 852 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 853 } 854 prow = nextprow; 855 } 856 857 /* if free space is not available, make more free space */ 858 if (current_space->local_remaining<nzk) { 859 i = am - k + 1; /* num of unfactored rows */ 860 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 861 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 862 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 863 reallocs++; 864 } 865 866 /* copy data into free_space and free_space_lvl, then initialize lnk */ 867 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 868 869 /* add the k-th row into il and jl */ 870 if (nzk-1 > 0){ 871 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 872 jl[k] = jl[i]; jl[i] = k; 873 il[k] = ui[k] + 1; 874 } 875 uj_ptr[k] = current_space->array; 876 uj_lvl_ptr[k] = current_space_lvl->array; 877 878 current_space->array += nzk; 879 current_space->local_used += nzk; 880 current_space->local_remaining -= nzk; 881 882 current_space_lvl->array += nzk; 883 current_space_lvl->local_used += nzk; 884 current_space_lvl->local_remaining -= nzk; 885 886 ui[k+1] = ui[k] + nzk; 887 } 888 889 #if defined(PETSC_USE_INFO) 890 if (ai[am] != 0) { 891 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 892 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 893 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 894 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 895 } else { 896 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 897 } 898 #endif 899 900 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 901 ierr = PetscFree(jl);CHKERRQ(ierr); 902 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 903 904 /* destroy list of free space and other temporary array(s) */ 905 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 906 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 907 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 908 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 909 910 /* put together the new matrix in MATSEQSBAIJ format */ 911 B = fact; 912 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 913 914 b = (Mat_SeqSBAIJ*)B->data; 915 b->singlemalloc = PETSC_FALSE; 916 b->free_a = PETSC_TRUE; 917 b->free_ij = PETSC_TRUE; 918 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 919 b->j = uj; 920 b->i = ui; 921 b->diag = 0; 922 b->ilen = 0; 923 b->imax = 0; 924 b->row = perm; 925 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 926 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 927 b->icol = perm; 928 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 929 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 930 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 931 b->maxnz = b->nz = ui[am]; 932 933 B->info.factor_mallocs = reallocs; 934 B->info.fill_ratio_given = fill; 935 if (ai[am] != 0) { 936 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 937 } else { 938 B->info.fill_ratio_needed = 0.0; 939 } 940 if (perm_identity){ 941 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 942 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 943 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 944 } else { 945 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 946 } 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 952 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 953 { 954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 955 Mat_SeqSBAIJ *b; 956 Mat B; 957 PetscErrorCode ierr; 958 PetscTruth perm_identity; 959 PetscReal fill = info->fill; 960 const PetscInt *rip; 961 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 962 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 963 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 964 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 965 PetscBT lnkbt; 966 967 PetscFunctionBegin; 968 if (bs > 1) { /* convert to seqsbaij */ 969 if (!a->sbaijMat){ 970 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 971 } 972 (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 973 ierr = MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 /* check whether perm is the identity mapping */ 978 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 979 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 980 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 981 982 /* initialization */ 983 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 984 ui[0] = 0; 985 986 /* jl: linked list for storing indices of the pivot rows 987 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 988 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 989 il = jl + mbs; 990 cols = il + mbs; 991 ui_ptr = (PetscInt**)(cols + mbs); 992 for (i=0; i<mbs; i++){ 993 jl[i] = mbs; il[i] = 0; 994 } 995 996 /* create and initialize a linked list for storing column indices of the active row k */ 997 nlnk = mbs + 1; 998 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 999 1000 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1001 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1002 current_space = free_space; 1003 1004 for (k=0; k<mbs; k++){ /* for each active row k */ 1005 /* initialize lnk by the column indices of row rip[k] of A */ 1006 nzk = 0; 1007 ncols = ai[rip[k]+1] - ai[rip[k]]; 1008 ncols_upper = 0; 1009 for (j=0; j<ncols; j++){ 1010 i = rip[*(aj + ai[rip[k]] + j)]; 1011 if (i >= k){ /* only take upper triangular entry */ 1012 cols[ncols_upper] = i; 1013 ncols_upper++; 1014 } 1015 } 1016 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1017 nzk += nlnk; 1018 1019 /* update lnk by computing fill-in for each pivot row to be merged in */ 1020 prow = jl[k]; /* 1st pivot row */ 1021 1022 while (prow < k){ 1023 nextprow = jl[prow]; 1024 /* merge prow into k-th row */ 1025 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1026 jmax = ui[prow+1]; 1027 ncols = jmax-jmin; 1028 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1029 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1030 nzk += nlnk; 1031 1032 /* update il and jl for prow */ 1033 if (jmin < jmax){ 1034 il[prow] = jmin; 1035 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1036 } 1037 prow = nextprow; 1038 } 1039 1040 /* if free space is not available, make more free space */ 1041 if (current_space->local_remaining<nzk) { 1042 i = mbs - k + 1; /* num of unfactored rows */ 1043 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1044 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1045 reallocs++; 1046 } 1047 1048 /* copy data into free space, then initialize lnk */ 1049 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1050 1051 /* add the k-th row into il and jl */ 1052 if (nzk-1 > 0){ 1053 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1054 jl[k] = jl[i]; jl[i] = k; 1055 il[k] = ui[k] + 1; 1056 } 1057 ui_ptr[k] = current_space->array; 1058 current_space->array += nzk; 1059 current_space->local_used += nzk; 1060 current_space->local_remaining -= nzk; 1061 1062 ui[k+1] = ui[k] + nzk; 1063 } 1064 1065 #if defined(PETSC_USE_INFO) 1066 if (ai[mbs] != 0) { 1067 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1068 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1069 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1070 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1071 } else { 1072 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1073 } 1074 #endif 1075 1076 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1077 ierr = PetscFree(jl);CHKERRQ(ierr); 1078 1079 /* destroy list of free space and other temporary array(s) */ 1080 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1081 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1082 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1083 1084 /* put together the new matrix in MATSEQSBAIJ format */ 1085 B = fact; 1086 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1087 1088 b = (Mat_SeqSBAIJ*)B->data; 1089 b->singlemalloc = PETSC_FALSE; 1090 b->free_a = PETSC_TRUE; 1091 b->free_ij = PETSC_TRUE; 1092 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1093 b->j = uj; 1094 b->i = ui; 1095 b->diag = 0; 1096 b->ilen = 0; 1097 b->imax = 0; 1098 b->row = perm; 1099 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1100 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1101 b->icol = perm; 1102 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1103 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1104 ierr = PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1105 b->maxnz = b->nz = ui[mbs]; 1106 1107 B->info.factor_mallocs = reallocs; 1108 B->info.fill_ratio_given = fill; 1109 if (ai[mbs] != 0) { 1110 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1111 } else { 1112 B->info.fill_ratio_needed = 0.0; 1113 } 1114 if (perm_identity){ 1115 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1116 } else { 1117 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 /* --------------------------------------------------------- */ 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct" 1125 PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 1126 { 1127 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1128 PetscErrorCode ierr; 1129 const PetscInt *ai=a->i,*aj=a->j,*vi; 1130 PetscInt i,k,n=a->mbs; 1131 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2,*adiag=a->diag; 1132 MatScalar *aa=a->a,*v; 1133 PetscScalar *x,*b,*s,*t,*ls; 1134 1135 PetscFunctionBegin; 1136 /* printf("MatSolve_SeqBAIJ_NaturalOrdering_iludt..bs %d\n",bs); */ 1137 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1138 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1139 t = a->solve_work; 1140 1141 /* forward solve the lower triangular */ 1142 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy 1st block of b to t */ 1143 1144 for (i=1; i<n; i++) { 1145 v = aa + bs2*ai[i]; 1146 vi = aj + ai[i]; 1147 nz = ai[i+1] - ai[i]; 1148 s = t + bs*i; 1149 ierr = PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy i_th block of b to t */ 1150 for(k=0;k<nz;k++){ 1151 Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]); 1152 v += bs2; 1153 } 1154 } 1155 1156 /* backward solve the upper triangular */ 1157 ls = a->solve_work + A->cmap->n; 1158 for (i=n-1; i>=0; i--){ 1159 v = aa + bs2*ai[2*n-i]; 1160 vi = aj + ai[2*n-i]; 1161 nz = ai[2*n-i +1] - ai[2*n-i]-1; 1162 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1163 for(k=0;k<nz;k++){ 1164 Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]); 1165 v += bs2; 1166 } 1167 Kernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */ 1168 ierr = PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1169 } 1170 1171 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatSolve_SeqBAIJ_N_newdatastruct" 1179 PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat A,Vec bb,Vec xx) 1180 { 1181 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1182 IS iscol=a->col,isrow=a->row; 1183 PetscErrorCode ierr; 1184 const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*vi; 1185 PetscInt i,m,n=a->mbs; 1186 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2,k; 1187 MatScalar *aa=a->a,*v; 1188 PetscScalar *x,*b,*s,*t,*ls; 1189 1190 PetscFunctionBegin; 1191 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1193 t = a->solve_work; 1194 1195 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1196 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1197 1198 /* forward solve the lower triangular */ 1199 ierr = PetscMemcpy(t,b+bs*r[0],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1200 for (i=1; i<n; i++) { 1201 v = aa + bs2*ai[i]; 1202 vi = aj + ai[i]; 1203 nz = ai[i+1] - ai[i]; 1204 s = t + bs*i; 1205 ierr = PetscMemcpy(s,b+bs*r[i],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1206 for(m=0;m<nz;m++){ 1207 Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[m]); 1208 v += bs2; 1209 } 1210 } 1211 1212 /* backward solve the upper triangular */ 1213 ls = a->solve_work + A->cmap->n; 1214 for (i=n-1; i>=0; i--){ 1215 k = 2*n-i; 1216 v = aa + bs2*ai[k]; 1217 vi = aj + ai[k]; 1218 nz = ai[k+1] - ai[k] - 1; 1219 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1220 for(m=0;m<nz;m++){ 1221 Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[m]); 1222 v += bs2; 1223 } 1224 Kernel_w_gets_A_times_v(bs,ls,v,t+i*bs); /* *inv(diagonal[i]) */ 1225 ierr = PetscMemcpy(x + bs*c[i],t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1226 } 1227 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1228 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1229 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1230 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1231 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "BlockAbs_privat" 1237 PetscErrorCode BlockAbs_private(PetscInt nbs,PetscInt bs2,PetscScalar *blockarray,PetscReal *absarray) 1238 { 1239 PetscErrorCode ierr; 1240 PetscInt i,j; 1241 PetscFunctionBegin; 1242 ierr = PetscMemzero(absarray,(nbs+1)*sizeof(PetscReal));CHKERRQ(ierr); 1243 for (i=0; i<nbs; i++){ 1244 for (j=0; j<bs2; j++){ 1245 if (absarray[i] < PetscAbsScalar(blockarray[i*nbs+j])) absarray[i] = PetscAbsScalar(blockarray[i*nbs+j]); 1246 } 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatILUDTFactor_SeqBAIJ" 1253 PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 1254 { 1255 Mat B = *fact; 1256 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b; 1257 IS isicol; 1258 PetscErrorCode ierr; 1259 const PetscInt *r,*ic; 1260 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,bs2=a->bs2,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 1261 PetscInt *bi,*bj,*bdiag; 1262 1263 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1264 PetscInt nlnk,*lnk; 1265 PetscBT lnkbt; 1266 PetscTruth row_identity,icol_identity,both_identity; 1267 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,*multiplier,*vtmp; 1268 const PetscInt *ics; 1269 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1270 1271 PetscReal dt=info->dt; /* shift=info->shiftinblocks; */ 1272 PetscInt nnz_max; 1273 PetscTruth missing; 1274 PetscReal *vtmp_abs; 1275 MatScalar *v_work; 1276 PetscInt *v_pivots; 1277 1278 PetscFunctionBegin; 1279 /* ------- symbolic factorization, can be reused ---------*/ 1280 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1281 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1282 adiag=a->diag; 1283 1284 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1285 1286 /* bdiag is location of diagonal in factor */ 1287 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1288 1289 /* allocate row pointers bi */ 1290 ierr = PetscMalloc((2*mbs+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1291 1292 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 1293 dtcount = (PetscInt)info->dtcount; 1294 if (dtcount > mbs-1) dtcount = mbs-1; 1295 nnz_max = ai[mbs]+2*mbs*dtcount +2; 1296 /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */ 1297 ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1298 nnz_max = nnz_max*bs2; 1299 ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr); 1300 1301 /* put together the new matrix */ 1302 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 1304 b = (Mat_SeqBAIJ*)(B)->data; 1305 b->free_a = PETSC_TRUE; 1306 b->free_ij = PETSC_TRUE; 1307 b->singlemalloc = PETSC_FALSE; 1308 b->a = ba; 1309 b->j = bj; 1310 b->i = bi; 1311 b->diag = bdiag; 1312 b->ilen = 0; 1313 b->imax = 0; 1314 b->row = isrow; 1315 b->col = iscol; 1316 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1317 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1318 b->icol = isicol; 1319 ierr = PetscMalloc((bs*(mbs+1))*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1320 1321 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1322 b->maxnz = nnz_max/bs2; 1323 1324 (B)->factor = MAT_FACTOR_ILUDT; 1325 (B)->info.factor_mallocs = 0; 1326 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)(ai[mbs]*bs2)); 1327 CHKMEMQ; 1328 /* ------- end of symbolic factorization ---------*/ 1329 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1330 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1331 ics = ic; 1332 1333 /* linked list for storing column indices of the active row */ 1334 nlnk = mbs + 1; 1335 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1336 1337 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 1338 ierr = PetscMalloc((2*mbs+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 1339 jtmp = im + mbs; 1340 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 1341 ierr = PetscMalloc((2*mbs*bs2+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1342 vtmp = rtmp + bs2*mbs; 1343 ierr = PetscMalloc((mbs+1)*sizeof(PetscReal),&vtmp_abs);CHKERRQ(ierr); 1344 1345 ierr = PetscMalloc(bs*sizeof(PetscInt) + (bs+bs2)*sizeof(MatScalar),&v_work);CHKERRQ(ierr); 1346 multiplier = v_work + bs; 1347 v_pivots = (PetscInt*)(multiplier + bs2); 1348 1349 bi[0] = 0; 1350 bdiag[0] = (nnz_max/bs2)-1; /* location of diagonal in factor B */ 1351 bi[2*mbs+1] = bdiag[0]+1; /* endof bj and ba array */ 1352 for (i=0; i<mbs; i++) { 1353 /* copy initial fill into linked list */ 1354 nzi = 0; /* nonzeros for active row i */ 1355 nzi = ai[r[i]+1] - ai[r[i]]; 1356 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1357 nzi_al = adiag[r[i]] - ai[r[i]]; 1358 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 1359 /* printf("row %d, nzi_al/au %d %d\n",i,nzi_al,nzi_au); */ 1360 1361 /* load in initial unfactored row */ 1362 ajtmp = aj + ai[r[i]]; 1363 ierr = PetscLLAddPerm(nzi,ajtmp,ic,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1364 ierr = PetscMemzero(rtmp,(mbs*bs2+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1365 aatmp = a->a + bs2*ai[r[i]]; 1366 for (j=0; j<nzi; j++) { 1367 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],aatmp+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1368 } 1369 1370 /* add pivot rows into linked list */ 1371 row = lnk[mbs]; 1372 while (row < i) { 1373 nzi_bl = bi[row+1] - bi[row] + 1; 1374 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 1375 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 1376 nzi += nlnk; 1377 row = lnk[row]; 1378 } 1379 1380 /* copy data from lnk into jtmp, then initialize lnk */ 1381 ierr = PetscLLClean(mbs,mbs,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 1382 1383 /* numerical factorization */ 1384 bjtmp = jtmp; 1385 row = *bjtmp++; /* 1st pivot row */ 1386 1387 while (row < i) { 1388 pc = rtmp + bs2*row; 1389 pv = ba + bs2*bdiag[row]; /* inv(diag) of the pivot row */ 1390 Kernel_A_gets_A_times_B(bs,pc,pv,multiplier); /* pc= multiplier = pc*inv(diag[row]) */ 1391 ierr = BlockAbs_private(1,bs2,pc,vtmp_abs);CHKERRQ(ierr); 1392 if (vtmp_abs[0] > dt){ /* apply tolerance dropping rule */ 1393 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 1394 pv = ba + bs2*(bdiag[row+1] + 1); 1395 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 1396 for (j=0; j<nz; j++){ 1397 Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 1398 } 1399 /* ierr = PetscLogFlops(bslog*(nz+1.0)-bs);CHKERRQ(ierr); */ 1400 } 1401 row = *bjtmp++; 1402 } 1403 1404 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 1405 nzi_bl = 0; j = 0; 1406 while (jtmp[j] < i){ /* L-part. Note: jtmp is sorted */ 1407 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1408 nzi_bl++; j++; 1409 } 1410 nzi_bu = nzi - nzi_bl -1; 1411 /* printf("nzi %d, nzi_bl %d, nzi_bu %d\n",nzi,nzi_bl,nzi_bu); */ 1412 1413 while (j < nzi){ /* U-part */ 1414 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1415 /* 1416 printf(" col %d: ",jtmp[j]); 1417 for (j1=0; j1<bs2; j1++) printf(" %g",*(vtmp+bs2*j+j1)); 1418 printf(" \n"); 1419 */ 1420 j++; 1421 } 1422 1423 ierr = BlockAbs_private(nzi,bs2,vtmp,vtmp_abs);CHKERRQ(ierr); 1424 /* 1425 printf(" row %d, nzi %d, vtmp_abs\n",i,nzi); 1426 for (j1=0; j1<nzi; j1++) printf(" (%d %g),",jtmp[j1],vtmp_abs[j1]); 1427 printf(" \n"); 1428 */ 1429 bjtmp = bj + bi[i]; 1430 batmp = ba + bs2*bi[i]; 1431 /* apply level dropping rule to L part */ 1432 ncut = nzi_al + dtcount; 1433 if (ncut < nzi_bl){ 1434 ierr = PetscSortSplitReal(ncut,nzi_bl,vtmp_abs,jtmp);CHKERRQ(ierr); 1435 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 1436 } else { 1437 ncut = nzi_bl; 1438 } 1439 for (j=0; j<ncut; j++){ 1440 bjtmp[j] = jtmp[j]; 1441 ierr = PetscMemcpy(batmp+bs2*j,rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1442 /* 1443 printf(" col %d: ",bjtmp[j]); 1444 for (j1=0; j1<bs2; j1++) printf(" %g,",*(batmp+bs2*j+j1)); 1445 printf("\n"); 1446 */ 1447 } 1448 bi[i+1] = bi[i] + ncut; 1449 nzi = ncut + 1; 1450 1451 /* apply level dropping rule to U part */ 1452 ncut = nzi_au + dtcount; 1453 if (ncut < nzi_bu){ 1454 ierr = PetscSortSplitReal(ncut,nzi_bu,vtmp_abs+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 1455 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 1456 } else { 1457 ncut = nzi_bu; 1458 } 1459 nzi += ncut; 1460 1461 /* mark bdiagonal */ 1462 bdiag[i+1] = bdiag[i] - (ncut + 1); 1463 bi[2*mbs - i] = bi[2*mbs - i +1] - (ncut + 1); 1464 1465 bjtmp = bj + bdiag[i]; 1466 batmp = ba + bs2*bdiag[i]; 1467 ierr = PetscMemcpy(batmp,rtmp+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1468 *bjtmp = i; 1469 /* 1470 printf(" diag %d: ",*bjtmp); 1471 for (j=0; j<bs2; j++){ 1472 printf(" %g,",batmp[j]); 1473 } 1474 printf("\n"); 1475 */ 1476 bjtmp = bj + bdiag[i+1]+1; 1477 batmp = ba + (bdiag[i+1]+1)*bs2; 1478 1479 for (k=0; k<ncut; k++){ 1480 bjtmp[k] = jtmp[nzi_bl+1+k]; 1481 ierr = PetscMemcpy(batmp+bs2*k,rtmp+bs2*bjtmp[k],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 /* 1483 printf(" col %d:",bjtmp[k]); 1484 for (j1=0; j1<bs2; j1++) printf(" %g,",*(batmp+bs2*k+j1)); 1485 printf("\n"); 1486 */ 1487 } 1488 1489 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 1490 1491 /* invert diagonal block for simplier triangular solves - add shift??? */ 1492 batmp = ba + bs2*bdiag[i]; 1493 ierr = Kernel_A_gets_inverse_A(bs,batmp,v_pivots,v_work);CHKERRQ(ierr); 1494 } /* for (i=0; i<mbs; i++) */ 1495 1496 /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */ 1497 if (bi[mbs] >= bdiag[mbs]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[mbs],bdiag[mbs]); 1498 1499 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1500 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1501 1502 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1503 1504 ierr = PetscFree(im);CHKERRQ(ierr); 1505 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1506 1507 ierr = PetscLogFlops(bs2*B->cmap->n);CHKERRQ(ierr); 1508 b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs]; 1509 1510 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1511 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 1512 both_identity = (PetscTruth) (row_identity && icol_identity); 1513 if (row_identity && icol_identity) { 1514 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 1515 } else { 1516 B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 1517 } 1518 1519 B->ops->solveadd = 0; 1520 B->ops->solvetranspose = 0; 1521 B->ops->solvetransposeadd = 0; 1522 B->ops->matsolve = 0; 1523 B->assembled = PETSC_TRUE; 1524 B->preallocated = PETSC_TRUE; 1525 PetscFunctionReturn(0); 1526 } 1527