1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 9 /* ------------------------------------------------------------*/ 10 /* 11 Version for when blocks are 2 by 2 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 15 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,MatFactorInfo *info,Mat *B) 16 { 17 Mat C = *B; 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 19 IS isrow = b->row,isicol = b->icol; 20 PetscErrorCode ierr; 21 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 22 PetscInt *ajtmpold,*ajtmp,nz,row; 23 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 24 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 25 MatScalar p1,p2,p3,p4; 26 MatScalar *ba = b->a,*aa = a->a; 27 PetscReal shift = info->shiftinblocks; 28 29 PetscFunctionBegin; 30 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 31 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 32 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 33 34 for (i=0; i<n; i++) { 35 nz = bi[i+1] - bi[i]; 36 ajtmp = bj + bi[i]; 37 for (j=0; j<nz; j++) { 38 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 39 } 40 /* load in initial (unfactored row) */ 41 idx = r[i]; 42 nz = ai[idx+1] - ai[idx]; 43 ajtmpold = aj + ai[idx]; 44 v = aa + 4*ai[idx]; 45 for (j=0; j<nz; j++) { 46 x = rtmp+4*ic[ajtmpold[j]]; 47 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 48 v += 4; 49 } 50 row = *ajtmp++; 51 while (row < i) { 52 pc = rtmp + 4*row; 53 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 54 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 55 pv = ba + 4*diag_offset[row]; 56 pj = bj + diag_offset[row] + 1; 57 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 58 pc[0] = m1 = p1*x1 + p3*x2; 59 pc[1] = m2 = p2*x1 + p4*x2; 60 pc[2] = m3 = p1*x3 + p3*x4; 61 pc[3] = m4 = p2*x3 + p4*x4; 62 nz = bi[row+1] - diag_offset[row] - 1; 63 pv += 4; 64 for (j=0; j<nz; j++) { 65 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 66 x = rtmp + 4*pj[j]; 67 x[0] -= m1*x1 + m3*x2; 68 x[1] -= m2*x1 + m4*x2; 69 x[2] -= m1*x3 + m3*x4; 70 x[3] -= m2*x3 + m4*x4; 71 pv += 4; 72 } 73 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); 74 } 75 row = *ajtmp++; 76 } 77 /* finished row so stick it into b->a */ 78 pv = ba + 4*bi[i]; 79 pj = bj + bi[i]; 80 nz = bi[i+1] - bi[i]; 81 for (j=0; j<nz; j++) { 82 x = rtmp+4*pj[j]; 83 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 84 pv += 4; 85 } 86 /* invert diagonal block */ 87 w = ba + 4*diag_offset[i]; 88 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 89 } 90 91 ierr = PetscFree(rtmp);CHKERRQ(ierr); 92 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 93 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 94 C->factor = FACTOR_LU; 95 C->assembled = PETSC_TRUE; 96 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 97 PetscFunctionReturn(0); 98 } 99 /* 100 Version for when blocks are 2 by 2 Using natural ordering 101 */ 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 104 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B) 105 { 106 Mat C = *B; 107 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 108 PetscErrorCode ierr; 109 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 110 PetscInt *ajtmpold,*ajtmp,nz,row; 111 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 112 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 113 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 114 MatScalar *ba = b->a,*aa = a->a; 115 PetscReal shift = info->shiftinblocks; 116 117 PetscFunctionBegin; 118 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 119 120 for (i=0; i<n; i++) { 121 nz = bi[i+1] - bi[i]; 122 ajtmp = bj + bi[i]; 123 for (j=0; j<nz; j++) { 124 x = rtmp+4*ajtmp[j]; 125 x[0] = x[1] = x[2] = x[3] = 0.0; 126 } 127 /* load in initial (unfactored row) */ 128 nz = ai[i+1] - ai[i]; 129 ajtmpold = aj + ai[i]; 130 v = aa + 4*ai[i]; 131 for (j=0; j<nz; j++) { 132 x = rtmp+4*ajtmpold[j]; 133 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 134 v += 4; 135 } 136 row = *ajtmp++; 137 while (row < i) { 138 pc = rtmp + 4*row; 139 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 140 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 141 pv = ba + 4*diag_offset[row]; 142 pj = bj + diag_offset[row] + 1; 143 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 144 pc[0] = m1 = p1*x1 + p3*x2; 145 pc[1] = m2 = p2*x1 + p4*x2; 146 pc[2] = m3 = p1*x3 + p3*x4; 147 pc[3] = m4 = p2*x3 + p4*x4; 148 nz = bi[row+1] - diag_offset[row] - 1; 149 pv += 4; 150 for (j=0; j<nz; j++) { 151 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 152 x = rtmp + 4*pj[j]; 153 x[0] -= m1*x1 + m3*x2; 154 x[1] -= m2*x1 + m4*x2; 155 x[2] -= m1*x3 + m3*x4; 156 x[3] -= m2*x3 + m4*x4; 157 pv += 4; 158 } 159 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); 160 } 161 row = *ajtmp++; 162 } 163 /* finished row so stick it into b->a */ 164 pv = ba + 4*bi[i]; 165 pj = bj + bi[i]; 166 nz = bi[i+1] - bi[i]; 167 for (j=0; j<nz; j++) { 168 x = rtmp+4*pj[j]; 169 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 170 pv += 4; 171 } 172 /* invert diagonal block */ 173 w = ba + 4*diag_offset[i]; 174 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 175 } 176 177 ierr = PetscFree(rtmp);CHKERRQ(ierr); 178 C->factor = FACTOR_LU; 179 C->assembled = PETSC_TRUE; 180 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 181 PetscFunctionReturn(0); 182 } 183 184 /* ----------------------------------------------------------- */ 185 /* 186 Version for when blocks are 1 by 1. 187 */ 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 190 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,MatFactorInfo *info,Mat *B) 191 { 192 Mat C = *B; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 194 IS isrow = b->row,isicol = b->icol; 195 PetscErrorCode ierr; 196 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 197 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 198 PetscInt *diag_offset = b->diag,diag,*pj; 199 MatScalar *pv,*v,*rtmp,multiplier,*pc; 200 MatScalar *ba = b->a,*aa = a->a; 201 202 PetscFunctionBegin; 203 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 204 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 205 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 206 207 for (i=0; i<n; i++) { 208 nz = bi[i+1] - bi[i]; 209 ajtmp = bj + bi[i]; 210 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 211 212 /* load in initial (unfactored row) */ 213 nz = ai[r[i]+1] - ai[r[i]]; 214 ajtmpold = aj + ai[r[i]]; 215 v = aa + ai[r[i]]; 216 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 217 218 row = *ajtmp++; 219 while (row < i) { 220 pc = rtmp + row; 221 if (*pc != 0.0) { 222 pv = ba + diag_offset[row]; 223 pj = bj + diag_offset[row] + 1; 224 multiplier = *pc * *pv++; 225 *pc = multiplier; 226 nz = bi[row+1] - diag_offset[row] - 1; 227 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 228 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 229 } 230 row = *ajtmp++; 231 } 232 /* finished row so stick it into b->a */ 233 pv = ba + bi[i]; 234 pj = bj + bi[i]; 235 nz = bi[i+1] - bi[i]; 236 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 237 diag = diag_offset[i] - bi[i]; 238 /* check pivot entry for current row */ 239 if (pv[diag] == 0.0) { 240 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 241 } 242 pv[diag] = 1.0/pv[diag]; 243 } 244 245 ierr = PetscFree(rtmp);CHKERRQ(ierr); 246 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 247 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 248 C->factor = FACTOR_LU; 249 C->assembled = PETSC_TRUE; 250 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatGetFactor_seqbaij_petsc" 256 PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 257 { 258 PetscInt n = A->rmap.n; 259 PetscErrorCode ierr; 260 261 PetscFunctionBegin; 262 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 263 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 264 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 265 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 266 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 267 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 268 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 269 PetscFunctionReturn(0); 270 } 271 272 273 /* ----------------------------------------------------------- */ 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 276 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 277 { 278 PetscErrorCode ierr; 279 Mat C; 280 281 PetscFunctionBegin; 282 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_LU,&C);CHKERRQ(ierr); 283 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 284 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 285 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 286 ierr = PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #include "src/mat/impls/sbaij/seq/sbaij.h" 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 293 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,MatFactorInfo *info,Mat *B) 294 { 295 PetscErrorCode ierr; 296 Mat C = *B; 297 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 298 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 299 IS ip=b->row; 300 PetscInt *rip,i,j,mbs=a->mbs,bs=A->rmap.bs,*bi=b->i,*bj=b->j,*bcol; 301 PetscInt *ai=a->i,*aj=a->j; 302 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 303 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 304 PetscReal zeropivot,rs,shiftnz; 305 PetscReal shiftpd; 306 ChShift_Ctx sctx; 307 PetscInt newshift; 308 309 PetscFunctionBegin; 310 if (bs > 1) { 311 if (!a->sbaijMat){ 312 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 313 } 314 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(a->sbaijMat,info,B);CHKERRQ(ierr); 315 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 316 a->sbaijMat = PETSC_NULL; 317 PetscFunctionReturn(0); 318 } 319 320 /* initialization */ 321 shiftnz = info->shiftnz; 322 shiftpd = info->shiftpd; 323 zeropivot = info->zeropivot; 324 325 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 326 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 327 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 328 jl = il + mbs; 329 rtmp = (MatScalar*)(jl + mbs); 330 331 sctx.shift_amount = 0; 332 sctx.nshift = 0; 333 do { 334 sctx.chshift = PETSC_FALSE; 335 for (i=0; i<mbs; i++) { 336 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 337 } 338 339 for (k = 0; k<mbs; k++){ 340 bval = ba + bi[k]; 341 /* initialize k-th row by the perm[k]-th row of A */ 342 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 343 for (j = jmin; j < jmax; j++){ 344 col = rip[aj[j]]; 345 if (col >= k){ /* only take upper triangular entry */ 346 rtmp[col] = aa[j]; 347 *bval++ = 0.0; /* for in-place factorization */ 348 } 349 } 350 351 /* shift the diagonal of the matrix */ 352 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 353 354 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 355 dk = rtmp[k]; 356 i = jl[k]; /* first row to be added to k_th row */ 357 358 while (i < k){ 359 nexti = jl[i]; /* next row to be added to k_th row */ 360 361 /* compute multiplier, update diag(k) and U(i,k) */ 362 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 363 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 364 dk += uikdi*ba[ili]; 365 ba[ili] = uikdi; /* -U(i,k) */ 366 367 /* add multiple of row i to k-th row */ 368 jmin = ili + 1; jmax = bi[i+1]; 369 if (jmin < jmax){ 370 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 371 /* update il and jl for row i */ 372 il[i] = jmin; 373 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 374 } 375 i = nexti; 376 } 377 378 /* shift the diagonals when zero pivot is detected */ 379 /* compute rs=sum of abs(off-diagonal) */ 380 rs = 0.0; 381 jmin = bi[k]+1; 382 nz = bi[k+1] - jmin; 383 if (nz){ 384 bcol = bj + jmin; 385 while (nz--){ 386 rs += PetscAbsScalar(rtmp[*bcol]); 387 bcol++; 388 } 389 } 390 391 sctx.rs = rs; 392 sctx.pv = dk; 393 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 394 if (newshift == 1) break; 395 396 /* copy data into U(k,:) */ 397 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 398 jmin = bi[k]+1; jmax = bi[k+1]; 399 if (jmin < jmax) { 400 for (j=jmin; j<jmax; j++){ 401 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 402 } 403 /* add the k-th row into il and jl */ 404 il[k] = jmin; 405 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 406 } 407 } 408 } while (sctx.chshift); 409 ierr = PetscFree(il);CHKERRQ(ierr); 410 411 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 412 C->factor = FACTOR_CHOLESKY; 413 C->assembled = PETSC_TRUE; 414 C->preallocated = PETSC_TRUE; 415 ierr = PetscLogFlops(C->rmap.N);CHKERRQ(ierr); 416 if (sctx.nshift){ 417 if (shiftnz) { 418 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 419 } else if (shiftpd) { 420 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 421 } 422 } 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 428 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *fact) 429 { 430 Mat C = *fact; 431 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 432 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 433 PetscErrorCode ierr; 434 PetscInt i,j,am=a->mbs; 435 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 436 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 437 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 438 PetscReal zeropivot,rs,shiftnz; 439 PetscReal shiftpd; 440 ChShift_Ctx sctx; 441 PetscInt newshift; 442 443 PetscFunctionBegin; 444 /* initialization */ 445 shiftnz = info->shiftnz; 446 shiftpd = info->shiftpd; 447 zeropivot = info->zeropivot; 448 449 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 450 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 451 jl = il + am; 452 rtmp = (MatScalar*)(jl + am); 453 454 sctx.shift_amount = 0; 455 sctx.nshift = 0; 456 do { 457 sctx.chshift = PETSC_FALSE; 458 for (i=0; i<am; i++) { 459 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 460 } 461 462 for (k = 0; k<am; k++){ 463 /* initialize k-th row with elements nonzero in row perm(k) of A */ 464 nz = ai[k+1] - ai[k]; 465 acol = aj + ai[k]; 466 aval = aa + ai[k]; 467 bval = ba + bi[k]; 468 while (nz -- ){ 469 if (*acol < k) { /* skip lower triangular entries */ 470 acol++; aval++; 471 } else { 472 rtmp[*acol++] = *aval++; 473 *bval++ = 0.0; /* for in-place factorization */ 474 } 475 } 476 477 /* shift the diagonal of the matrix */ 478 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 479 480 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 481 dk = rtmp[k]; 482 i = jl[k]; /* first row to be added to k_th row */ 483 484 while (i < k){ 485 nexti = jl[i]; /* next row to be added to k_th row */ 486 /* compute multiplier, update D(k) and U(i,k) */ 487 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 488 uikdi = - ba[ili]*ba[bi[i]]; 489 dk += uikdi*ba[ili]; 490 ba[ili] = uikdi; /* -U(i,k) */ 491 492 /* add multiple of row i to k-th row ... */ 493 jmin = ili + 1; 494 nz = bi[i+1] - jmin; 495 if (nz > 0){ 496 bcol = bj + jmin; 497 bval = ba + jmin; 498 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 499 /* update il and jl for i-th row */ 500 il[i] = jmin; 501 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 502 } 503 i = nexti; 504 } 505 506 /* shift the diagonals when zero pivot is detected */ 507 /* compute rs=sum of abs(off-diagonal) */ 508 rs = 0.0; 509 jmin = bi[k]+1; 510 nz = bi[k+1] - jmin; 511 if (nz){ 512 bcol = bj + jmin; 513 while (nz--){ 514 rs += PetscAbsScalar(rtmp[*bcol]); 515 bcol++; 516 } 517 } 518 519 sctx.rs = rs; 520 sctx.pv = dk; 521 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 522 if (newshift == 1) break; /* sctx.shift_amount is updated */ 523 524 /* copy data into U(k,:) */ 525 ba[bi[k]] = 1.0/dk; 526 jmin = bi[k]+1; 527 nz = bi[k+1] - jmin; 528 if (nz){ 529 bcol = bj + jmin; 530 bval = ba + jmin; 531 while (nz--){ 532 *bval++ = rtmp[*bcol]; 533 rtmp[*bcol++] = 0.0; 534 } 535 /* add k-th row into il and jl */ 536 il[k] = jmin; 537 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 538 } 539 } 540 } while (sctx.chshift); 541 ierr = PetscFree(il);CHKERRQ(ierr); 542 543 C->factor = FACTOR_CHOLESKY; 544 C->assembled = PETSC_TRUE; 545 C->preallocated = PETSC_TRUE; 546 ierr = PetscLogFlops(C->rmap.N);CHKERRQ(ierr); 547 if (sctx.nshift){ 548 if (shiftnz) { 549 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 550 } else if (shiftpd) { 551 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 552 } 553 } 554 PetscFunctionReturn(0); 555 } 556 557 #include "petscbt.h" 558 #include "src/mat/utils/freespace.h" 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 561 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 562 { 563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 564 Mat_SeqSBAIJ *b; 565 Mat B; 566 PetscErrorCode ierr; 567 PetscTruth perm_identity; 568 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap.bs,*ui; 569 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 570 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 571 PetscReal fill=info->fill,levels=info->levels; 572 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 573 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 574 PetscBT lnkbt; 575 576 PetscFunctionBegin; 577 if (bs > 1){ 578 if (!a->sbaijMat){ 579 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 580 } 581 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 582 B = *fact; 583 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 584 PetscFunctionReturn(0); 585 } 586 587 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 588 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 589 590 /* special case that simply copies fill pattern */ 591 if (!levels && perm_identity) { 592 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 593 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 594 for (i=0; i<am; i++) { 595 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 596 } 597 B = *fact; 598 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 599 600 601 b = (Mat_SeqSBAIJ*)B->data; 602 uj = b->j; 603 for (i=0; i<am; i++) { 604 aj = a->j + a->diag[i]; 605 for (j=0; j<ui[i]; j++){ 606 *uj++ = *aj++; 607 } 608 b->ilen[i] = ui[i]; 609 } 610 ierr = PetscFree(ui);CHKERRQ(ierr); 611 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 612 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 613 614 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 615 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 616 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 617 PetscFunctionReturn(0); 618 } 619 620 /* initialization */ 621 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 622 ui[0] = 0; 623 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 624 625 /* jl: linked list for storing indices of the pivot rows 626 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 627 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 628 il = jl + am; 629 uj_ptr = (PetscInt**)(il + am); 630 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 631 for (i=0; i<am; i++){ 632 jl[i] = am; il[i] = 0; 633 } 634 635 /* create and initialize a linked list for storing column indices of the active row k */ 636 nlnk = am + 1; 637 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 638 639 /* initial FreeSpace size is fill*(ai[am]+1) */ 640 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 641 current_space = free_space; 642 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 643 current_space_lvl = free_space_lvl; 644 645 for (k=0; k<am; k++){ /* for each active row k */ 646 /* initialize lnk by the column indices of row rip[k] of A */ 647 nzk = 0; 648 ncols = ai[rip[k]+1] - ai[rip[k]]; 649 ncols_upper = 0; 650 cols = cols_lvl + am; 651 for (j=0; j<ncols; j++){ 652 i = rip[*(aj + ai[rip[k]] + j)]; 653 if (i >= k){ /* only take upper triangular entry */ 654 cols[ncols_upper] = i; 655 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 656 ncols_upper++; 657 } 658 } 659 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 660 nzk += nlnk; 661 662 /* update lnk by computing fill-in for each pivot row to be merged in */ 663 prow = jl[k]; /* 1st pivot row */ 664 665 while (prow < k){ 666 nextprow = jl[prow]; 667 668 /* merge prow into k-th row */ 669 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 670 jmax = ui[prow+1]; 671 ncols = jmax-jmin; 672 i = jmin - ui[prow]; 673 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 674 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 675 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 676 nzk += nlnk; 677 678 /* update il and jl for prow */ 679 if (jmin < jmax){ 680 il[prow] = jmin; 681 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 682 } 683 prow = nextprow; 684 } 685 686 /* if free space is not available, make more free space */ 687 if (current_space->local_remaining<nzk) { 688 i = am - k + 1; /* num of unfactored rows */ 689 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 690 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 691 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 692 reallocs++; 693 } 694 695 /* copy data into free_space and free_space_lvl, then initialize lnk */ 696 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 697 698 /* add the k-th row into il and jl */ 699 if (nzk-1 > 0){ 700 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 701 jl[k] = jl[i]; jl[i] = k; 702 il[k] = ui[k] + 1; 703 } 704 uj_ptr[k] = current_space->array; 705 uj_lvl_ptr[k] = current_space_lvl->array; 706 707 current_space->array += nzk; 708 current_space->local_used += nzk; 709 current_space->local_remaining -= nzk; 710 711 current_space_lvl->array += nzk; 712 current_space_lvl->local_used += nzk; 713 current_space_lvl->local_remaining -= nzk; 714 715 ui[k+1] = ui[k] + nzk; 716 } 717 718 #if defined(PETSC_USE_INFO) 719 if (ai[am] != 0) { 720 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 721 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 722 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 723 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 724 } else { 725 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 726 } 727 #endif 728 729 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 730 ierr = PetscFree(jl);CHKERRQ(ierr); 731 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 732 733 /* destroy list of free space and other temporary array(s) */ 734 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 735 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 736 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 737 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 738 739 /* put together the new matrix in MATSEQSBAIJ format */ 740 B = *fact; 741 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 742 743 b = (Mat_SeqSBAIJ*)B->data; 744 b->singlemalloc = PETSC_FALSE; 745 b->free_a = PETSC_TRUE; 746 b->free_ij = PETSC_TRUE; 747 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 748 b->j = uj; 749 b->i = ui; 750 b->diag = 0; 751 b->ilen = 0; 752 b->imax = 0; 753 b->row = perm; 754 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 755 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 756 b->icol = perm; 757 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 758 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 759 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 760 b->maxnz = b->nz = ui[am]; 761 762 B->factor = FACTOR_CHOLESKY; 763 B->info.factor_mallocs = reallocs; 764 B->info.fill_ratio_given = fill; 765 if (ai[am] != 0) { 766 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 767 } else { 768 B->info.fill_ratio_needed = 0.0; 769 } 770 if (perm_identity){ 771 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 772 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 773 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 774 } else { 775 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 782 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 783 { 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 785 Mat_SeqSBAIJ *b; 786 Mat B; 787 PetscErrorCode ierr; 788 PetscTruth perm_identity; 789 PetscReal fill = info->fill; 790 PetscInt *rip,i,mbs=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 791 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 792 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 793 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 794 PetscBT lnkbt; 795 796 PetscFunctionBegin; 797 if (bs > 1) { /* convert to seqsbaij */ 798 if (!a->sbaijMat){ 799 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 800 } 801 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 802 B = *fact; 803 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 804 PetscFunctionReturn(0); 805 } 806 807 /* check whether perm is the identity mapping */ 808 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 809 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 810 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 811 812 /* initialization */ 813 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 814 ui[0] = 0; 815 816 /* jl: linked list for storing indices of the pivot rows 817 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 818 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 819 il = jl + mbs; 820 cols = il + mbs; 821 ui_ptr = (PetscInt**)(cols + mbs); 822 for (i=0; i<mbs; i++){ 823 jl[i] = mbs; il[i] = 0; 824 } 825 826 /* create and initialize a linked list for storing column indices of the active row k */ 827 nlnk = mbs + 1; 828 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 829 830 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 831 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 832 current_space = free_space; 833 834 for (k=0; k<mbs; k++){ /* for each active row k */ 835 /* initialize lnk by the column indices of row rip[k] of A */ 836 nzk = 0; 837 ncols = ai[rip[k]+1] - ai[rip[k]]; 838 ncols_upper = 0; 839 for (j=0; j<ncols; j++){ 840 i = rip[*(aj + ai[rip[k]] + j)]; 841 if (i >= k){ /* only take upper triangular entry */ 842 cols[ncols_upper] = i; 843 ncols_upper++; 844 } 845 } 846 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 847 nzk += nlnk; 848 849 /* update lnk by computing fill-in for each pivot row to be merged in */ 850 prow = jl[k]; /* 1st pivot row */ 851 852 while (prow < k){ 853 nextprow = jl[prow]; 854 /* merge prow into k-th row */ 855 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 856 jmax = ui[prow+1]; 857 ncols = jmax-jmin; 858 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 859 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 860 nzk += nlnk; 861 862 /* update il and jl for prow */ 863 if (jmin < jmax){ 864 il[prow] = jmin; 865 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 866 } 867 prow = nextprow; 868 } 869 870 /* if free space is not available, make more free space */ 871 if (current_space->local_remaining<nzk) { 872 i = mbs - k + 1; /* num of unfactored rows */ 873 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 874 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 875 reallocs++; 876 } 877 878 /* copy data into free space, then initialize lnk */ 879 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 880 881 /* add the k-th row into il and jl */ 882 if (nzk-1 > 0){ 883 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 884 jl[k] = jl[i]; jl[i] = k; 885 il[k] = ui[k] + 1; 886 } 887 ui_ptr[k] = current_space->array; 888 current_space->array += nzk; 889 current_space->local_used += nzk; 890 current_space->local_remaining -= nzk; 891 892 ui[k+1] = ui[k] + nzk; 893 } 894 895 #if defined(PETSC_USE_INFO) 896 if (ai[mbs] != 0) { 897 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 898 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 899 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 900 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 901 } else { 902 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 903 } 904 #endif 905 906 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 907 ierr = PetscFree(jl);CHKERRQ(ierr); 908 909 /* destroy list of free space and other temporary array(s) */ 910 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 911 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 912 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 913 914 /* put together the new matrix in MATSEQSBAIJ format */ 915 B = *fact; 916 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 917 918 b = (Mat_SeqSBAIJ*)B->data; 919 b->singlemalloc = PETSC_FALSE; 920 b->free_a = PETSC_TRUE; 921 b->free_ij = PETSC_TRUE; 922 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 923 b->j = uj; 924 b->i = ui; 925 b->diag = 0; 926 b->ilen = 0; 927 b->imax = 0; 928 b->row = perm; 929 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 930 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 931 b->icol = perm; 932 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 933 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 934 ierr = PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 935 b->maxnz = b->nz = ui[mbs]; 936 937 B->factor = FACTOR_CHOLESKY; 938 B->info.factor_mallocs = reallocs; 939 B->info.fill_ratio_given = fill; 940 if (ai[mbs] != 0) { 941 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 942 } else { 943 B->info.fill_ratio_needed = 0.0; 944 } 945 if (perm_identity){ 946 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 947 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 948 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 949 } else { 950 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 951 } 952 PetscFunctionReturn(0); 953 } 954