1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/inline/ilu.h" 6 7 /* ------------------------------------------------------------*/ 8 /* 9 Version for when blocks are 2 by 2 10 */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 14 { 15 Mat C = *B; 16 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 17 IS isrow = b->row,isicol = b->icol; 18 PetscErrorCode ierr; 19 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 20 PetscInt *ajtmpold,*ajtmp,nz,row; 21 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 23 MatScalar p1,p2,p3,p4; 24 MatScalar *ba = b->a,*aa = a->a; 25 26 PetscFunctionBegin; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 30 31 for (i=0; i<n; i++) { 32 nz = bi[i+1] - bi[i]; 33 ajtmp = bj + bi[i]; 34 for (j=0; j<nz; j++) { 35 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 36 } 37 /* load in initial (unfactored row) */ 38 idx = r[i]; 39 nz = ai[idx+1] - ai[idx]; 40 ajtmpold = aj + ai[idx]; 41 v = aa + 4*ai[idx]; 42 for (j=0; j<nz; j++) { 43 x = rtmp+4*ic[ajtmpold[j]]; 44 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 45 v += 4; 46 } 47 row = *ajtmp++; 48 while (row < i) { 49 pc = rtmp + 4*row; 50 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 51 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 52 pv = ba + 4*diag_offset[row]; 53 pj = bj + diag_offset[row] + 1; 54 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 55 pc[0] = m1 = p1*x1 + p3*x2; 56 pc[1] = m2 = p2*x1 + p4*x2; 57 pc[2] = m3 = p1*x3 + p3*x4; 58 pc[3] = m4 = p2*x3 + p4*x4; 59 nz = bi[row+1] - diag_offset[row] - 1; 60 pv += 4; 61 for (j=0; j<nz; j++) { 62 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 63 x = rtmp + 4*pj[j]; 64 x[0] -= m1*x1 + m3*x2; 65 x[1] -= m2*x1 + m4*x2; 66 x[2] -= m1*x3 + m3*x4; 67 x[3] -= m2*x3 + m4*x4; 68 pv += 4; 69 } 70 PetscLogFlops(16*nz+12); 71 } 72 row = *ajtmp++; 73 } 74 /* finished row so stick it into b->a */ 75 pv = ba + 4*bi[i]; 76 pj = bj + bi[i]; 77 nz = bi[i+1] - bi[i]; 78 for (j=0; j<nz; j++) { 79 x = rtmp+4*pj[j]; 80 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 81 pv += 4; 82 } 83 /* invert diagonal block */ 84 w = ba + 4*diag_offset[i]; 85 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 86 } 87 88 ierr = PetscFree(rtmp);CHKERRQ(ierr); 89 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 90 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 91 C->factor = FACTOR_LU; 92 C->assembled = PETSC_TRUE; 93 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 94 PetscFunctionReturn(0); 95 } 96 /* 97 Version for when blocks are 2 by 2 Using natural ordering 98 */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 102 { 103 Mat C = *B; 104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 107 PetscInt *ajtmpold,*ajtmp,nz,row; 108 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 110 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 111 MatScalar *ba = b->a,*aa = a->a; 112 113 PetscFunctionBegin; 114 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 115 116 for (i=0; i<n; i++) { 117 nz = bi[i+1] - bi[i]; 118 ajtmp = bj + bi[i]; 119 for (j=0; j<nz; j++) { 120 x = rtmp+4*ajtmp[j]; 121 x[0] = x[1] = x[2] = x[3] = 0.0; 122 } 123 /* load in initial (unfactored row) */ 124 nz = ai[i+1] - ai[i]; 125 ajtmpold = aj + ai[i]; 126 v = aa + 4*ai[i]; 127 for (j=0; j<nz; j++) { 128 x = rtmp+4*ajtmpold[j]; 129 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 130 v += 4; 131 } 132 row = *ajtmp++; 133 while (row < i) { 134 pc = rtmp + 4*row; 135 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 136 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 137 pv = ba + 4*diag_offset[row]; 138 pj = bj + diag_offset[row] + 1; 139 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 140 pc[0] = m1 = p1*x1 + p3*x2; 141 pc[1] = m2 = p2*x1 + p4*x2; 142 pc[2] = m3 = p1*x3 + p3*x4; 143 pc[3] = m4 = p2*x3 + p4*x4; 144 nz = bi[row+1] - diag_offset[row] - 1; 145 pv += 4; 146 for (j=0; j<nz; j++) { 147 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 148 x = rtmp + 4*pj[j]; 149 x[0] -= m1*x1 + m3*x2; 150 x[1] -= m2*x1 + m4*x2; 151 x[2] -= m1*x3 + m3*x4; 152 x[3] -= m2*x3 + m4*x4; 153 pv += 4; 154 } 155 PetscLogFlops(16*nz+12); 156 } 157 row = *ajtmp++; 158 } 159 /* finished row so stick it into b->a */ 160 pv = ba + 4*bi[i]; 161 pj = bj + bi[i]; 162 nz = bi[i+1] - bi[i]; 163 for (j=0; j<nz; j++) { 164 x = rtmp+4*pj[j]; 165 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 166 pv += 4; 167 } 168 /* invert diagonal block */ 169 w = ba + 4*diag_offset[i]; 170 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 171 /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 172 } 173 174 ierr = PetscFree(rtmp);CHKERRQ(ierr); 175 C->factor = FACTOR_LU; 176 C->assembled = PETSC_TRUE; 177 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 178 PetscFunctionReturn(0); 179 } 180 181 /* ----------------------------------------------------------- */ 182 /* 183 Version for when blocks are 1 by 1. 184 */ 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 188 { 189 Mat C = *B; 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 191 IS isrow = b->row,isicol = b->icol; 192 PetscErrorCode ierr; 193 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 194 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 195 PetscInt *diag_offset = b->diag,diag,*pj; 196 MatScalar *pv,*v,*rtmp,multiplier,*pc; 197 MatScalar *ba = b->a,*aa = a->a; 198 199 PetscFunctionBegin; 200 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 201 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 202 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 203 204 for (i=0; i<n; i++) { 205 nz = bi[i+1] - bi[i]; 206 ajtmp = bj + bi[i]; 207 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 208 209 /* load in initial (unfactored row) */ 210 nz = ai[r[i]+1] - ai[r[i]]; 211 ajtmpold = aj + ai[r[i]]; 212 v = aa + ai[r[i]]; 213 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 214 215 row = *ajtmp++; 216 while (row < i) { 217 pc = rtmp + row; 218 if (*pc != 0.0) { 219 pv = ba + diag_offset[row]; 220 pj = bj + diag_offset[row] + 1; 221 multiplier = *pc * *pv++; 222 *pc = multiplier; 223 nz = bi[row+1] - diag_offset[row] - 1; 224 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 225 PetscLogFlops(1+2*nz); 226 } 227 row = *ajtmp++; 228 } 229 /* finished row so stick it into b->a */ 230 pv = ba + bi[i]; 231 pj = bj + bi[i]; 232 nz = bi[i+1] - bi[i]; 233 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 234 diag = diag_offset[i] - bi[i]; 235 /* check pivot entry for current row */ 236 if (pv[diag] == 0.0) { 237 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 238 } 239 pv[diag] = 1.0/pv[diag]; 240 } 241 242 ierr = PetscFree(rtmp);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 245 C->factor = FACTOR_LU; 246 C->assembled = PETSC_TRUE; 247 PetscLogFlops(C->n); 248 PetscFunctionReturn(0); 249 } 250 251 252 /* ----------------------------------------------------------- */ 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 255 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 256 { 257 PetscErrorCode ierr; 258 Mat C; 259 260 PetscFunctionBegin; 261 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 262 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 263 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 264 PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 265 PetscFunctionReturn(0); 266 } 267 268 #include "src/mat/impls/sbaij/seq/sbaij.h" 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 271 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,Mat *B) 272 { 273 Mat C = *B; 274 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 275 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 276 IS ip=b->row; 277 PetscErrorCode ierr,(*f)(Mat,Mat*); 278 PetscInt *rip,i,j,mbs=a->mbs,bs=A->bs,*bi=b->i,*bj=b->j,*bcol; 279 PetscInt *ai=a->i,*aj=a->j; 280 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 281 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 282 PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 283 PetscTruth damp,chshift; 284 PetscInt nshift=0,ndamp=0; 285 286 PetscFunctionBegin; 287 if (bs > 1) { 288 if (!a->sbaijMat){ 289 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 290 } 291 ierr = PetscObjectQueryFunction((PetscObject)C,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 292 ierr = (*f)(a->sbaijMat,B);CHKERRQ(ierr); 293 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 294 a->sbaijMat = PETSC_NULL; 295 PetscFunctionReturn(0); 296 } 297 298 /* initialization */ 299 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 300 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 301 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 302 jl = il + mbs; 303 rtmp = (MatScalar*)(jl + mbs); 304 305 shift_amount = 0; 306 do { 307 damp = PETSC_FALSE; 308 chshift = PETSC_FALSE; 309 for (i=0; i<mbs; i++) { 310 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 311 } 312 313 for (k = 0; k<mbs; k++){ 314 bval = ba + bi[k]; 315 /* initialize k-th row by the perm[k]-th row of A */ 316 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 317 for (j = jmin; j < jmax; j++){ 318 col = rip[aj[j]]; 319 if (col >= k){ /* only take upper triangular entry */ 320 rtmp[col] = aa[j]; 321 *bval++ = 0.0; /* for in-place factorization */ 322 } 323 } 324 /* damp the diagonal of the matrix */ 325 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 326 327 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 328 dk = rtmp[k]; 329 i = jl[k]; /* first row to be added to k_th row */ 330 331 while (i < k){ 332 nexti = jl[i]; /* next row to be added to k_th row */ 333 334 /* compute multiplier, update diag(k) and U(i,k) */ 335 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 336 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 337 dk += uikdi*ba[ili]; 338 ba[ili] = uikdi; /* -U(i,k) */ 339 340 /* add multiple of row i to k-th row */ 341 jmin = ili + 1; jmax = bi[i+1]; 342 if (jmin < jmax){ 343 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 344 /* update il and jl for row i */ 345 il[i] = jmin; 346 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 347 } 348 i = nexti; 349 } 350 351 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 352 /* calculate a shift that would make this row diagonally dominant */ 353 PetscReal rs = PetscAbs(PetscRealPart(dk)); 354 jmin = bi[k]+1; 355 nz = bi[k+1] - jmin; 356 if (nz){ 357 bcol = bj + jmin; 358 bval = ba + jmin; 359 while (nz--){ 360 rs += PetscAbsScalar(rtmp[*bcol++]); 361 } 362 } 363 /* if this shift is less than the previous, just up the previous 364 one by a bit */ 365 shift_amount = PetscMax(rs,1.1*shift_amount); 366 chshift = PETSC_TRUE; 367 /* Unlike in the ILU case there is no exit condition on nshift: 368 we increase the shift until it converges. There is no guarantee that 369 this algorithm converges faster or slower, or is better or worse 370 than the ILU algorithm. */ 371 nshift++; 372 break; 373 } 374 if (PetscRealPart(dk) < zeropivot){ 375 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 376 if (damping > 0.0) { 377 if (ndamp) damping *= 2.0; 378 damp = PETSC_TRUE; 379 ndamp++; 380 break; 381 } else if (PetscAbsScalar(dk) < zeropivot){ 382 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 383 } else { 384 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 385 } 386 } 387 388 /* copy data into U(k,:) */ 389 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 390 jmin = bi[k]+1; jmax = bi[k+1]; 391 if (jmin < jmax) { 392 for (j=jmin; j<jmax; j++){ 393 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 394 } 395 /* add the k-th row into il and jl */ 396 il[k] = jmin; 397 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 398 } 399 } 400 } while (damp||chshift); 401 ierr = PetscFree(il);CHKERRQ(ierr); 402 403 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 404 C->factor = FACTOR_CHOLESKY; 405 C->assembled = PETSC_TRUE; 406 C->preallocated = PETSC_TRUE; 407 PetscLogFlops(C->m); 408 if (ndamp) { 409 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 410 } 411 if (nshift) { 412 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ diagonal shifted %D shifts\n",nshift); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 419 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,Mat *fact) 420 { 421 Mat C = *fact; 422 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 423 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 424 PetscErrorCode ierr; 425 PetscInt i,j,am=a->mbs,bs=A->bs; 426 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 427 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 428 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 429 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 430 PetscTruth damp,chshift; 431 PetscInt nshift=0; 432 433 PetscFunctionBegin; 434 if (bs > 1) { 435 SETERRQ(PETSC_ERR_USER,"not done yet"); 436 } 437 438 /* initialization */ 439 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 440 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 441 jl = il + am; 442 rtmp = (MatScalar*)(jl + am); 443 444 shift_amount = 0; 445 do { 446 damp = PETSC_FALSE; 447 chshift = PETSC_FALSE; 448 for (i=0; i<am; i++) { 449 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 450 } 451 452 for (k = 0; k<am; k++){ 453 /* initialize k-th row with elements nonzero in row perm(k) of A */ 454 nz = ai[k+1] - ai[k]; 455 acol = aj + ai[k]; 456 aval = aa + ai[k]; 457 bval = ba + bi[k]; 458 while (nz -- ){ 459 if (*acol < k) { /* skip lower triangular entries */ 460 acol++; aval++; 461 } else { 462 rtmp[*acol++] = *aval++; 463 *bval++ = 0.0; /* for in-place factorization */ 464 } 465 } 466 /* damp the diagonal of the matrix */ 467 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 468 469 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 470 dk = rtmp[k]; 471 i = jl[k]; /* first row to be added to k_th row */ 472 473 while (i < k){ 474 nexti = jl[i]; /* next row to be added to k_th row */ 475 /* compute multiplier, update D(k) and U(i,k) */ 476 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 477 uikdi = - ba[ili]*ba[bi[i]]; 478 dk += uikdi*ba[ili]; 479 ba[ili] = uikdi; /* -U(i,k) */ 480 481 /* add multiple of row i to k-th row ... */ 482 jmin = ili + 1; 483 nz = bi[i+1] - jmin; 484 if (nz > 0){ 485 bcol = bj + jmin; 486 bval = ba + jmin; 487 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 488 /* update il and jl for i-th row */ 489 il[i] = jmin; 490 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 491 } 492 i = nexti; 493 } 494 495 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 496 /* calculate a shift that would make this row diagonally dominant */ 497 PetscReal rs = PetscAbs(PetscRealPart(dk)); 498 jmin = bi[k]+1; 499 nz = bi[k+1] - jmin; 500 if (nz){ 501 bcol = bj + jmin; 502 bval = ba + jmin; 503 while (nz--){ 504 rs += PetscAbsScalar(rtmp[*bcol++]); 505 } 506 } 507 /* if this shift is less than the previous, just up the previous 508 one by a bit */ 509 shift_amount = PetscMax(rs,1.1*shift_amount); 510 chshift = PETSC_TRUE; 511 /* Unlike in the ILU case there is no exit condition on nshift: 512 we increase the shift until it converges. There is no guarantee that 513 this algorithm converges faster or slower, or is better or worse 514 than the ILU algorithm. */ 515 nshift++; 516 break; 517 } 518 if (PetscRealPart(dk) < zeropivot){ 519 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 520 if (damping > 0.0) { 521 if (ndamp) damping *= 2.0; 522 damp = PETSC_TRUE; 523 ndamp++; 524 break; 525 } else if (PetscAbsScalar(dk) < zeropivot){ 526 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 527 } else { 528 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 529 } 530 } 531 532 /* copy data into U(k,:) */ 533 ba[bi[k]] = 1.0/dk; 534 jmin = bi[k]+1; 535 nz = bi[k+1] - jmin; 536 if (nz){ 537 bcol = bj + jmin; 538 bval = ba + jmin; 539 while (nz--){ 540 *bval++ = rtmp[*bcol]; 541 rtmp[*bcol++] = 0.0; 542 } 543 /* add k-th row into il and jl */ 544 il[k] = jmin; 545 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 546 } 547 } 548 } while (damp||chshift); 549 ierr = PetscFree(il);CHKERRQ(ierr); 550 551 C->factor = FACTOR_CHOLESKY; 552 C->assembled = PETSC_TRUE; 553 C->preallocated = PETSC_TRUE; 554 PetscLogFlops(C->m); 555 if (ndamp) { 556 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 557 } 558 if (nshift) { 559 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #include "petscbt.h" 565 #include "src/mat/utils/freespace.h" 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 568 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 569 { 570 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571 Mat_SeqSBAIJ *b; 572 Mat B; 573 PetscErrorCode ierr; 574 PetscTruth perm_identity; 575 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->bs,*ui; 576 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 577 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 578 PetscReal fill=info->fill,levels=info->levels; 579 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 580 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 581 PetscBT lnkbt; 582 583 PetscFunctionBegin; 584 if (bs > 1){ 585 if (!a->sbaijMat){ 586 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 587 } 588 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 589 B = *fact; 590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 591 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 592 PetscFunctionReturn(0); 593 } 594 595 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 596 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 597 598 /* special case that simply copies fill pattern */ 599 if (!levels && perm_identity) { 600 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 601 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 602 for (i=0; i<am; i++) { 603 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 604 } 605 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 606 B = *fact; 607 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 608 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 609 610 b = (Mat_SeqSBAIJ*)B->data; 611 uj = b->j; 612 for (i=0; i<am; i++) { 613 aj = a->j + a->diag[i]; 614 for (j=0; j<ui[i]; j++){ 615 *uj++ = *aj++; 616 } 617 b->ilen[i] = ui[i]; 618 } 619 ierr = PetscFree(ui);CHKERRQ(ierr); 620 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 623 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 624 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 626 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 627 PetscFunctionReturn(0); 628 } 629 630 /* initialization */ 631 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 632 ui[0] = 0; 633 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 634 635 /* jl: linked list for storing indices of the pivot rows 636 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 637 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 638 il = jl + am; 639 uj_ptr = (PetscInt**)(il + am); 640 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 641 for (i=0; i<am; i++){ 642 jl[i] = am; il[i] = 0; 643 } 644 645 /* create and initialize a linked list for storing column indices of the active row k */ 646 nlnk = am + 1; 647 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 648 649 /* initial FreeSpace size is fill*(ai[am]+1) */ 650 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 651 current_space = free_space; 652 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 653 current_space_lvl = free_space_lvl; 654 655 for (k=0; k<am; k++){ /* for each active row k */ 656 /* initialize lnk by the column indices of row rip[k] of A */ 657 nzk = 0; 658 ncols = ai[rip[k]+1] - ai[rip[k]]; 659 ncols_upper = 0; 660 cols = cols_lvl + am; 661 for (j=0; j<ncols; j++){ 662 i = rip[*(aj + ai[rip[k]] + j)]; 663 if (i >= k){ /* only take upper triangular entry */ 664 cols[ncols_upper] = i; 665 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 666 ncols_upper++; 667 } 668 } 669 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 670 nzk += nlnk; 671 672 /* update lnk by computing fill-in for each pivot row to be merged in */ 673 prow = jl[k]; /* 1st pivot row */ 674 675 while (prow < k){ 676 nextprow = jl[prow]; 677 678 /* merge prow into k-th row */ 679 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 680 jmax = ui[prow+1]; 681 ncols = jmax-jmin; 682 i = jmin - ui[prow]; 683 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 684 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 685 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 686 nzk += nlnk; 687 688 /* update il and jl for prow */ 689 if (jmin < jmax){ 690 il[prow] = jmin; 691 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 692 } 693 prow = nextprow; 694 } 695 696 /* if free space is not available, make more free space */ 697 if (current_space->local_remaining<nzk) { 698 i = am - k + 1; /* num of unfactored rows */ 699 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 700 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 701 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 702 reallocs++; 703 } 704 705 /* copy data into free_space and free_space_lvl, then initialize lnk */ 706 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 707 708 /* add the k-th row into il and jl */ 709 if (nzk-1 > 0){ 710 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 711 jl[k] = jl[i]; jl[i] = k; 712 il[k] = ui[k] + 1; 713 } 714 uj_ptr[k] = current_space->array; 715 uj_lvl_ptr[k] = current_space_lvl->array; 716 717 current_space->array += nzk; 718 current_space->local_used += nzk; 719 current_space->local_remaining -= nzk; 720 721 current_space_lvl->array += nzk; 722 current_space_lvl->local_used += nzk; 723 current_space_lvl->local_remaining -= nzk; 724 725 ui[k+1] = ui[k] + nzk; 726 } 727 728 if (ai[am] != 0) { 729 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 730 PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 731 PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 732 PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 733 } else { 734 PetscLogInfo(A,"MatICCFactorSymbolic_SeqBAIJ:Empty matrix.\n"); 735 } 736 737 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 738 ierr = PetscFree(jl);CHKERRQ(ierr); 739 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 740 741 /* destroy list of free space and other temporary array(s) */ 742 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 743 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 744 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 745 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 746 747 /* put together the new matrix in MATSEQSBAIJ format */ 748 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 749 B = *fact; 750 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 751 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 752 753 b = (Mat_SeqSBAIJ*)B->data; 754 ierr = PetscFree(b->imax);CHKERRQ(ierr); 755 b->singlemalloc = PETSC_FALSE; 756 /* the next line frees the default space generated by the Create() */ 757 ierr = PetscFree(b->a);CHKERRQ(ierr); 758 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 759 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 760 b->j = uj; 761 b->i = ui; 762 b->diag = 0; 763 b->ilen = 0; 764 b->imax = 0; 765 b->row = perm; 766 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 767 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 768 b->icol = perm; 769 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 770 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 771 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 772 b->maxnz = b->nz = ui[am]; 773 774 B->factor = FACTOR_CHOLESKY; 775 B->info.factor_mallocs = reallocs; 776 B->info.fill_ratio_given = fill; 777 if (ai[am] != 0) { 778 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 779 } else { 780 B->info.fill_ratio_needed = 0.0; 781 } 782 if (perm_identity){ 783 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 784 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 785 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 786 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 787 } else { 788 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 789 } 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 795 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 798 Mat_SeqSBAIJ *b; 799 Mat B; 800 PetscErrorCode ierr; 801 PetscTruth perm_identity; 802 PetscReal fill = info->fill; 803 PetscInt *rip,*riip,i,mbs=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 804 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 805 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 806 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 807 PetscBT lnkbt; 808 IS iperm; 809 810 PetscFunctionBegin; 811 if (bs > 1) { /* convert to seqsbaij */ 812 if (!a->sbaijMat){ 813 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 814 } 815 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 816 B = *fact; 817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 818 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 819 PetscFunctionReturn(0); 820 } 821 822 /* check whether perm is the identity mapping */ 823 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 824 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 825 826 if (!perm_identity){ 827 /* check if perm is symmetric! */ 828 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 829 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 830 for (i=0; i<mbs; i++) { 831 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 832 } 833 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 834 ierr = ISDestroy(iperm);CHKERRQ(ierr); 835 } 836 837 /* initialization */ 838 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 839 ui[0] = 0; 840 841 /* jl: linked list for storing indices of the pivot rows 842 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 843 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 844 il = jl + mbs; 845 cols = il + mbs; 846 ui_ptr = (PetscInt**)(cols + mbs); 847 for (i=0; i<mbs; i++){ 848 jl[i] = mbs; il[i] = 0; 849 } 850 851 /* create and initialize a linked list for storing column indices of the active row k */ 852 nlnk = mbs + 1; 853 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 854 855 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 856 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 857 current_space = free_space; 858 859 for (k=0; k<mbs; k++){ /* for each active row k */ 860 /* initialize lnk by the column indices of row rip[k] of A */ 861 nzk = 0; 862 ncols = ai[rip[k]+1] - ai[rip[k]]; 863 ncols_upper = 0; 864 for (j=0; j<ncols; j++){ 865 i = rip[*(aj + ai[rip[k]] + j)]; 866 if (i >= k){ /* only take upper triangular entry */ 867 cols[ncols_upper] = i; 868 ncols_upper++; 869 } 870 } 871 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 872 nzk += nlnk; 873 874 /* update lnk by computing fill-in for each pivot row to be merged in */ 875 prow = jl[k]; /* 1st pivot row */ 876 877 while (prow < k){ 878 nextprow = jl[prow]; 879 /* merge prow into k-th row */ 880 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 881 jmax = ui[prow+1]; 882 ncols = jmax-jmin; 883 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 884 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 885 nzk += nlnk; 886 887 /* update il and jl for prow */ 888 if (jmin < jmax){ 889 il[prow] = jmin; 890 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 891 } 892 prow = nextprow; 893 } 894 895 /* if free space is not available, make more free space */ 896 if (current_space->local_remaining<nzk) { 897 i = mbs - k + 1; /* num of unfactored rows */ 898 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 899 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 900 reallocs++; 901 } 902 903 /* copy data into free space, then initialize lnk */ 904 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 905 906 /* add the k-th row into il and jl */ 907 if (nzk-1 > 0){ 908 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 909 jl[k] = jl[i]; jl[i] = k; 910 il[k] = ui[k] + 1; 911 } 912 ui_ptr[k] = current_space->array; 913 current_space->array += nzk; 914 current_space->local_used += nzk; 915 current_space->local_remaining -= nzk; 916 917 ui[k+1] = ui[k] + nzk; 918 } 919 920 if (ai[mbs] != 0) { 921 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 922 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 923 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 924 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 925 } else { 926 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 927 } 928 929 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 930 ierr = PetscFree(jl);CHKERRQ(ierr); 931 932 /* destroy list of free space and other temporary array(s) */ 933 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 934 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 935 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 936 937 /* put together the new matrix in MATSEQSBAIJ format */ 938 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 939 B = *fact; 940 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 941 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 942 943 b = (Mat_SeqSBAIJ*)B->data; 944 ierr = PetscFree(b->imax);CHKERRQ(ierr); 945 b->singlemalloc = PETSC_FALSE; 946 /* the next line frees the default space generated by the Create() */ 947 ierr = PetscFree(b->a);CHKERRQ(ierr); 948 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 949 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 950 b->j = uj; 951 b->i = ui; 952 b->diag = 0; 953 b->ilen = 0; 954 b->imax = 0; 955 b->row = perm; 956 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 957 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 958 b->icol = perm; 959 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 960 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 961 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 962 b->maxnz = b->nz = ui[mbs]; 963 964 B->factor = FACTOR_CHOLESKY; 965 B->info.factor_mallocs = reallocs; 966 B->info.fill_ratio_given = fill; 967 if (ai[mbs] != 0) { 968 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 969 } else { 970 B->info.fill_ratio_needed = 0.0; 971 } 972 if (perm_identity){ 973 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 974 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 976 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 977 } else { 978 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 979 } 980 PetscFunctionReturn(0); 981 } 982