1 2 #include "src/mat/impls/baij/seq/baij.h" 3 #include "src/mat/impls/sbaij/seq/sbaij.h" 4 #include "src/inline/ilu.h" 5 #include "include/petscis.h" 6 7 #if !defined(PETSC_USE_COMPLEX) 8 /* 9 input: 10 F -- numeric factor 11 output: 12 nneg, nzero, npos: matrix inertia 13 */ 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 17 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 18 { 19 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 20 PetscScalar *dd = fact_ptr->a; 21 PetscInt mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i; 22 23 PetscFunctionBegin; 24 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 25 nneig_tmp = 0; npos_tmp = 0; 26 for (i=0; i<mbs; i++){ 27 if (PetscRealPart(dd[*fi]) > 0.0){ 28 npos_tmp++; 29 } else if (PetscRealPart(dd[*fi]) < 0.0){ 30 nneig_tmp++; 31 } 32 fi++; 33 } 34 if (nneig) *nneig = nneig_tmp; 35 if (npos) *npos = npos_tmp; 36 if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 37 38 PetscFunctionReturn(0); 39 } 40 #endif /* !defined(PETSC_USE_COMPLEX) */ 41 42 /* 43 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 44 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 45 */ 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 48 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 49 { 50 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 51 PetscErrorCode ierr; 52 PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 53 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 54 PetscInt m,reallocs = 0,prow; 55 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 56 PetscReal f = info->fill; 57 PetscTruth perm_identity; 58 59 PetscFunctionBegin; 60 /* check whether perm is the identity mapping */ 61 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 62 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 63 64 if (perm_identity){ /* without permutation */ 65 a->permute = PETSC_FALSE; 66 ai = a->i; aj = a->j; 67 } else { /* non-trivial permutation */ 68 a->permute = PETSC_TRUE; 69 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 70 ai = a->inew; aj = a->jnew; 71 } 72 73 /* initialization */ 74 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 75 umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 76 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 77 iu[0] = mbs+1; 78 juidx = mbs + 1; /* index for ju */ 79 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 80 q = jl + mbs; /* linked list for col index */ 81 for (i=0; i<mbs; i++){ 82 jl[i] = mbs; 83 q[i] = 0; 84 } 85 86 /* for each row k */ 87 for (k=0; k<mbs; k++){ 88 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 89 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 90 q[k] = mbs; 91 /* initialize nonzero structure of k-th row to row rip[k] of A */ 92 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 93 jmax = ai[rip[k]+1]; 94 for (j=jmin; j<jmax; j++){ 95 vj = rip[aj[j]]; /* col. value */ 96 if(vj > k){ 97 qm = k; 98 do { 99 m = qm; qm = q[m]; 100 } while(qm < vj); 101 if (qm == vj) { 102 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 103 } 104 nzk++; 105 q[m] = vj; 106 q[vj] = qm; 107 } /* if(vj > k) */ 108 } /* for (j=jmin; j<jmax; j++) */ 109 110 /* modify nonzero structure of k-th row by computing fill-in 111 for each row i to be merged in */ 112 prow = k; 113 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 114 115 while (prow < k){ 116 /* merge row prow into k-th row */ 117 jmin = iu[prow] + 1; jmax = iu[prow+1]; 118 qm = k; 119 for (j=jmin; j<jmax; j++){ 120 vj = ju[j]; 121 do { 122 m = qm; qm = q[m]; 123 } while (qm < vj); 124 if (qm != vj){ 125 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 126 } 127 } 128 prow = jl[prow]; /* next pivot row */ 129 } 130 131 /* add k to row list for first nonzero element in k-th row */ 132 if (nzk > 0){ 133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134 jl[k] = jl[i]; jl[i] = k; 135 } 136 iu[k+1] = iu[k] + nzk; 137 138 /* allocate more space to ju if needed */ 139 if (iu[k+1] > umax) { 140 /* estimate how much additional space we will need */ 141 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 142 /* just double the memory each time */ 143 maxadd = umax; 144 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 145 umax += maxadd; 146 147 /* allocate a longer ju */ 148 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 149 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 150 ierr = PetscFree(ju);CHKERRQ(ierr); 151 ju = jutmp; 152 reallocs++; /* count how many times we realloc */ 153 } 154 155 /* save nonzero structure of k-th row in ju */ 156 i=k; 157 while (nzk --) { 158 i = q[i]; 159 ju[juidx++] = i; 160 } 161 } 162 163 if (ai[mbs] != 0) { 164 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 165 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 166 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 167 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 168 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 169 } else { 170 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 171 } 172 173 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 174 ierr = PetscFree(jl);CHKERRQ(ierr); 175 176 /* put together the new matrix */ 177 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 178 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 179 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 180 181 /* PetscLogObjectParent(*B,iperm); */ 182 b = (Mat_SeqSBAIJ*)(*B)->data; 183 ierr = PetscFree(b->imax);CHKERRQ(ierr); 184 b->singlemalloc = PETSC_FALSE; 185 /* the next line frees the default space generated by the Create() */ 186 ierr = PetscFree(b->a);CHKERRQ(ierr); 187 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 188 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 189 b->j = ju; 190 b->i = iu; 191 b->diag = 0; 192 b->ilen = 0; 193 b->imax = 0; 194 b->row = perm; 195 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 196 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 197 b->icol = perm; 198 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 199 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 200 /* In b structure: Free imax, ilen, old a, old j. 201 Allocate idnew, solve_work, new a, new j */ 202 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 203 b->maxnz = b->nz = iu[mbs]; 204 205 (*B)->factor = FACTOR_CHOLESKY; 206 (*B)->info.factor_mallocs = reallocs; 207 (*B)->info.fill_ratio_given = f; 208 if (ai[mbs] != 0) { 209 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 210 } else { 211 (*B)->info.fill_ratio_needed = 0.0; 212 } 213 214 if (perm_identity){ 215 switch (bs) { 216 case 1: 217 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 218 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 219 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 220 break; 221 case 2: 222 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 223 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 224 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 225 break; 226 case 3: 227 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 228 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 229 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 230 break; 231 case 4: 232 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 233 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 234 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 235 break; 236 case 5: 237 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 238 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 239 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 240 break; 241 case 6: 242 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 243 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 244 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 245 break; 246 case 7: 247 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 248 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 249 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 250 break; 251 default: 252 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 253 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 254 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 255 break; 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 /* 261 Symbolic U^T*D*U factorization for SBAIJ format. 262 */ 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 265 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 268 PetscErrorCode ierr; 269 PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 270 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 271 PetscInt m,reallocs = 0,prow; 272 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 273 PetscInt *il,ili,nextprow; 274 PetscReal f = info->fill; 275 PetscTruth perm_identity; 276 277 PetscFunctionBegin; 278 /* 279 This code originally uses Modified Sparse Row (MSR) storage 280 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 281 Then it is rewritten so the factor B takes seqsbaij format. However the associated 282 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 283 thus the original code in MSR format is still used for these cases. 284 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 285 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 286 */ 287 /* check whether perm is the identity mapping */ 288 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 289 if (bs>1){ 290 a->permute = PETSC_TRUE; 291 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,B);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 /* At present, the code below works only for perm_identity=PETSC_TRUE */ 296 if (perm_identity){ 297 a->permute = PETSC_FALSE; 298 ai = a->i; aj = a->j; 299 } else { 300 a->permute = PETSC_TRUE; 301 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 302 ai = a->inew; aj = a->jnew; 303 } 304 305 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 306 307 /* initialization */ 308 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 309 umax = (PetscInt)(f*ai[mbs] + 1); 310 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 311 iu[0] = 0; 312 juidx = 0; /* index for ju */ 313 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 314 q = jl + mbs; /* linked list for col index of active row */ 315 il = q + mbs; 316 for (i=0; i<mbs; i++){ 317 jl[i] = mbs; 318 q[i] = 0; 319 il[i] = 0; 320 } 321 322 /* for each row k */ 323 for (k=0; k<mbs; k++){ 324 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 325 q[k] = mbs; 326 /* initialize nonzero structure of k-th row to row rip[k] of A */ 327 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 328 jmax = ai[rip[k]+1]; 329 for (j=jmin; j<jmax; j++){ 330 vj = rip[aj[j]]; /* col. value */ 331 if(vj > k){ 332 qm = k; 333 do { 334 m = qm; qm = q[m]; 335 } while(qm < vj); 336 if (qm == vj) { 337 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 338 } 339 nzk++; 340 q[m] = vj; 341 q[vj] = qm; 342 } /* if(vj > k) */ 343 } /* for (j=jmin; j<jmax; j++) */ 344 345 /* modify nonzero structure of k-th row by computing fill-in 346 for each row i to be merged in */ 347 prow = k; 348 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 349 350 while (prow < k){ 351 nextprow = jl[prow]; 352 353 /* merge row prow into k-th row */ 354 ili = il[prow]; 355 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 356 jmax = iu[prow+1]; 357 qm = k; 358 for (j=jmin; j<jmax; j++){ 359 vj = ju[j]; 360 do { 361 m = qm; qm = q[m]; 362 } while (qm < vj); 363 if (qm != vj){ /* a fill */ 364 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 365 } 366 } /* end of for (j=jmin; j<jmax; j++) */ 367 if (jmin < jmax){ 368 il[prow] = jmin; 369 j = ju[jmin]; 370 jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 371 } 372 prow = nextprow; 373 } 374 375 /* update il and jl */ 376 if (nzk > 0){ 377 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 378 jl[k] = jl[i]; jl[i] = k; 379 il[k] = iu[k] + 1; 380 } 381 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 382 383 /* allocate more space to ju if needed */ 384 if (iu[k+1] > umax) { 385 /* estimate how much additional space we will need */ 386 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 387 /* just double the memory each time */ 388 maxadd = umax; 389 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 390 umax += maxadd; 391 392 /* allocate a longer ju */ 393 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 394 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 395 ierr = PetscFree(ju);CHKERRQ(ierr); 396 ju = jutmp; 397 reallocs++; /* count how many times we realloc */ 398 } 399 400 /* save nonzero structure of k-th row in ju */ 401 ju[juidx++] = k; /* diag[k] */ 402 i = k; 403 while (nzk --) { 404 i = q[i]; 405 ju[juidx++] = i; 406 } 407 } 408 409 if (ai[mbs] != 0) { 410 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 411 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 412 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 413 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 414 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 415 } else { 416 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 417 } 418 419 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 420 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 421 ierr = PetscFree(jl);CHKERRQ(ierr); 422 423 /* put together the new matrix */ 424 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 425 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 426 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 427 428 /* PetscLogObjectParent(*B,iperm); */ 429 b = (Mat_SeqSBAIJ*)(*B)->data; 430 ierr = PetscFree(b->imax);CHKERRQ(ierr); 431 b->singlemalloc = PETSC_FALSE; 432 /* the next line frees the default space generated by the Create() */ 433 ierr = PetscFree(b->a);CHKERRQ(ierr); 434 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 435 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 436 b->j = ju; 437 b->i = iu; 438 b->diag = 0; 439 b->ilen = 0; 440 b->imax = 0; 441 b->row = perm; 442 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 443 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 444 b->icol = perm; 445 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 446 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 447 /* In b structure: Free imax, ilen, old a, old j. 448 Allocate idnew, solve_work, new a, new j */ 449 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 450 b->maxnz = b->nz = iu[mbs]; 451 452 (*B)->factor = FACTOR_CHOLESKY; 453 (*B)->info.factor_mallocs = reallocs; 454 (*B)->info.fill_ratio_given = f; 455 if (ai[mbs] != 0) { 456 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 457 } else { 458 (*B)->info.fill_ratio_needed = 0.0; 459 } 460 461 if (perm_identity){ 462 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 463 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 464 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 465 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special natural ordering factor and solve BS=1\n"); 466 } 467 #ifdef OLD 468 /* check whether perm is the identity mapping */ 469 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 470 if (bs>1 || !perm_identity){ 471 a->permute = PETSC_TRUE; 472 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,B);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 /* At present, the code below works only for perm_identity=PETSC_TRUE */ 477 a->permute = PETSC_FALSE; 478 479 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 480 ai = a->i; aj = a->j; 481 482 /* initialization */ 483 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 484 umax = (PetscInt)(f*ai[mbs] + 1); 485 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 486 iu[0] = 0; 487 juidx = 0; /* index for ju */ 488 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 489 q = jl + mbs; /* linked list for col index of active row */ 490 il = q + mbs; 491 for (i=0; i<mbs; i++){ 492 jl[i] = mbs; 493 q[i] = 0; 494 il[i] = 0; 495 } 496 497 /* for each row k */ 498 for (k=0; k<mbs; k++){ 499 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 500 q[k] = mbs; 501 /* initialize nonzero structure of k-th row to row rip[k] of A */ 502 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 503 jmax = ai[rip[k]+1]; 504 for (j=jmin; j<jmax; j++){ 505 vj = rip[aj[j]]; /* col. value */ 506 if(vj > k){ 507 qm = k; 508 do { 509 m = qm; qm = q[m]; 510 } while(qm < vj); 511 if (qm == vj) { 512 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 513 } 514 nzk++; 515 q[m] = vj; 516 q[vj] = qm; 517 } /* if(vj > k) */ 518 } /* for (j=jmin; j<jmax; j++) */ 519 520 /* modify nonzero structure of k-th row by computing fill-in 521 for each row i to be merged in */ 522 prow = k; 523 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 524 525 while (prow < k){ 526 nextprow = jl[prow]; 527 528 /* merge row prow into k-th row */ 529 ili = il[prow]; 530 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 531 jmax = iu[prow+1]; 532 qm = k; 533 for (j=jmin; j<jmax; j++){ 534 vj = ju[j]; 535 do { 536 m = qm; qm = q[m]; 537 } while (qm < vj); 538 if (qm != vj){ /* a fill */ 539 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 540 } 541 } /* end of for (j=jmin; j<jmax; j++) */ 542 if (jmin < jmax){ 543 il[prow] = jmin; 544 j = ju[jmin]; 545 jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 546 } 547 prow = nextprow; 548 } 549 550 /* update il and jl */ 551 if (nzk > 0){ 552 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 553 jl[k] = jl[i]; jl[i] = k; 554 il[k] = iu[k] + 1; 555 } 556 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 557 558 /* allocate more space to ju if needed */ 559 if (iu[k+1] > umax) { 560 /* estimate how much additional space we will need */ 561 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 562 /* just double the memory each time */ 563 maxadd = umax; 564 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 565 umax += maxadd; 566 567 /* allocate a longer ju */ 568 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 569 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 570 ierr = PetscFree(ju);CHKERRQ(ierr); 571 ju = jutmp; 572 reallocs++; /* count how many times we realloc */ 573 } 574 575 /* save nonzero structure of k-th row in ju */ 576 ju[juidx++] = k; /* diag[k] */ 577 i = k; 578 while (nzk --) { 579 i = q[i]; 580 ju[juidx++] = i; 581 } 582 } 583 584 if (ai[mbs] != 0) { 585 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 586 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 587 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 588 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 589 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 590 } else { 591 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 592 } 593 594 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 595 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 596 ierr = PetscFree(jl);CHKERRQ(ierr); 597 598 /* put together the new matrix */ 599 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 600 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 601 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 602 603 /* PetscLogObjectParent(*B,iperm); */ 604 b = (Mat_SeqSBAIJ*)(*B)->data; 605 ierr = PetscFree(b->imax);CHKERRQ(ierr); 606 b->singlemalloc = PETSC_FALSE; 607 /* the next line frees the default space generated by the Create() */ 608 ierr = PetscFree(b->a);CHKERRQ(ierr); 609 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 610 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 611 b->j = ju; 612 b->i = iu; 613 b->diag = 0; 614 b->ilen = 0; 615 b->imax = 0; 616 b->row = perm; 617 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 618 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 619 b->icol = perm; 620 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 621 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 622 /* In b structure: Free imax, ilen, old a, old j. 623 Allocate idnew, solve_work, new a, new j */ 624 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 625 b->maxnz = b->nz = iu[mbs]; 626 627 (*B)->factor = FACTOR_CHOLESKY; 628 (*B)->info.factor_mallocs = reallocs; 629 (*B)->info.fill_ratio_given = f; 630 if (ai[mbs] != 0) { 631 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 632 } else { 633 (*B)->info.fill_ratio_needed = 0.0; 634 } 635 636 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 637 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 638 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 639 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 640 #endif /* OLD */ 641 PetscFunctionReturn(0); 642 } 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 645 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 646 { 647 Mat C = *B; 648 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 649 IS perm = b->row; 650 PetscErrorCode ierr; 651 PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 652 PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 653 PetscInt bs=A->bs,bs2 = a->bs2; 654 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 655 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 656 MatScalar *work; 657 PetscInt *pivots; 658 659 PetscFunctionBegin; 660 /* initialization */ 661 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 662 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 663 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 664 jl = il + mbs; 665 for (i=0; i<mbs; i++) { 666 jl[i] = mbs; il[0] = 0; 667 } 668 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 669 uik = dk + bs2; 670 work = uik + bs2; 671 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 672 673 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 674 675 /* check permutation */ 676 if (!a->permute){ 677 ai = a->i; aj = a->j; aa = a->a; 678 } else { 679 ai = a->inew; aj = a->jnew; 680 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 681 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 682 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 683 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 684 685 for (i=0; i<mbs; i++){ 686 jmin = ai[i]; jmax = ai[i+1]; 687 for (j=jmin; j<jmax; j++){ 688 while (a2anew[j] != j){ 689 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 690 for (k1=0; k1<bs2; k1++){ 691 dk[k1] = aa[k*bs2+k1]; 692 aa[k*bs2+k1] = aa[j*bs2+k1]; 693 aa[j*bs2+k1] = dk[k1]; 694 } 695 } 696 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 697 if (i > aj[j]){ 698 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 699 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 700 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 701 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 702 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 703 } 704 } 705 } 706 } 707 ierr = PetscFree(a2anew);CHKERRQ(ierr); 708 } 709 710 /* for each row k */ 711 for (k = 0; k<mbs; k++){ 712 713 /*initialize k-th row with elements nonzero in row perm(k) of A */ 714 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 715 716 ap = aa + jmin*bs2; 717 for (j = jmin; j < jmax; j++){ 718 vj = perm_ptr[aj[j]]; /* block col. index */ 719 rtmp_ptr = rtmp + vj*bs2; 720 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 721 } 722 723 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 724 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 725 i = jl[k]; /* first row to be added to k_th row */ 726 727 while (i < k){ 728 nexti = jl[i]; /* next row to be added to k_th row */ 729 730 /* compute multiplier */ 731 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 732 733 /* uik = -inv(Di)*U_bar(i,k) */ 734 diag = ba + i*bs2; 735 u = ba + ili*bs2; 736 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 737 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 738 739 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 740 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 741 742 /* update -U(i,k) */ 743 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 744 745 /* add multiple of row i to k-th row ... */ 746 jmin = ili + 1; jmax = bi[i+1]; 747 if (jmin < jmax){ 748 for (j=jmin; j<jmax; j++) { 749 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 750 rtmp_ptr = rtmp + bj[j]*bs2; 751 u = ba + j*bs2; 752 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 753 } 754 755 /* ... add i to row list for next nonzero entry */ 756 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 757 j = bj[jmin]; 758 jl[i] = jl[j]; jl[j] = i; /* update jl */ 759 } 760 i = nexti; 761 } 762 763 /* save nonzero entries in k-th row of U ... */ 764 765 /* invert diagonal block */ 766 diag = ba+k*bs2; 767 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 768 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 769 770 jmin = bi[k]; jmax = bi[k+1]; 771 if (jmin < jmax) { 772 for (j=jmin; j<jmax; j++){ 773 vj = bj[j]; /* block col. index of U */ 774 u = ba + j*bs2; 775 rtmp_ptr = rtmp + vj*bs2; 776 for (k1=0; k1<bs2; k1++){ 777 *u++ = *rtmp_ptr; 778 *rtmp_ptr++ = 0.0; 779 } 780 } 781 782 /* ... add k to row list for first nonzero entry in k-th row */ 783 il[k] = jmin; 784 i = bj[jmin]; 785 jl[k] = jl[i]; jl[i] = k; 786 } 787 } 788 789 ierr = PetscFree(rtmp);CHKERRQ(ierr); 790 ierr = PetscFree(il);CHKERRQ(ierr); 791 ierr = PetscFree(dk);CHKERRQ(ierr); 792 ierr = PetscFree(pivots);CHKERRQ(ierr); 793 if (a->permute){ 794 ierr = PetscFree(aa);CHKERRQ(ierr); 795 } 796 797 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 798 C->factor = FACTOR_CHOLESKY; 799 C->assembled = PETSC_TRUE; 800 C->preallocated = PETSC_TRUE; 801 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 807 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 808 { 809 Mat C = *B; 810 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 811 PetscErrorCode ierr; 812 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 813 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 814 PetscInt bs=A->bs,bs2 = a->bs2; 815 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 816 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 817 MatScalar *work; 818 PetscInt *pivots; 819 820 PetscFunctionBegin; 821 /* initialization */ 822 823 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 824 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 825 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 826 jl = il + mbs; 827 for (i=0; i<mbs; i++) { 828 jl[i] = mbs; il[0] = 0; 829 } 830 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 831 uik = dk + bs2; 832 work = uik + bs2; 833 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 834 835 ai = a->i; aj = a->j; aa = a->a; 836 837 /* for each row k */ 838 for (k = 0; k<mbs; k++){ 839 840 /*initialize k-th row with elements nonzero in row k of A */ 841 jmin = ai[k]; jmax = ai[k+1]; 842 ap = aa + jmin*bs2; 843 for (j = jmin; j < jmax; j++){ 844 vj = aj[j]; /* block col. index */ 845 rtmp_ptr = rtmp + vj*bs2; 846 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 847 } 848 849 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 850 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 851 i = jl[k]; /* first row to be added to k_th row */ 852 853 while (i < k){ 854 nexti = jl[i]; /* next row to be added to k_th row */ 855 856 /* compute multiplier */ 857 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 858 859 /* uik = -inv(Di)*U_bar(i,k) */ 860 diag = ba + i*bs2; 861 u = ba + ili*bs2; 862 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 863 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 864 865 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 866 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 867 868 /* update -U(i,k) */ 869 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 870 871 /* add multiple of row i to k-th row ... */ 872 jmin = ili + 1; jmax = bi[i+1]; 873 if (jmin < jmax){ 874 for (j=jmin; j<jmax; j++) { 875 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 876 rtmp_ptr = rtmp + bj[j]*bs2; 877 u = ba + j*bs2; 878 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 879 } 880 881 /* ... add i to row list for next nonzero entry */ 882 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 883 j = bj[jmin]; 884 jl[i] = jl[j]; jl[j] = i; /* update jl */ 885 } 886 i = nexti; 887 } 888 889 /* save nonzero entries in k-th row of U ... */ 890 891 /* invert diagonal block */ 892 diag = ba+k*bs2; 893 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 894 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 895 896 jmin = bi[k]; jmax = bi[k+1]; 897 if (jmin < jmax) { 898 for (j=jmin; j<jmax; j++){ 899 vj = bj[j]; /* block col. index of U */ 900 u = ba + j*bs2; 901 rtmp_ptr = rtmp + vj*bs2; 902 for (k1=0; k1<bs2; k1++){ 903 *u++ = *rtmp_ptr; 904 *rtmp_ptr++ = 0.0; 905 } 906 } 907 908 /* ... add k to row list for first nonzero entry in k-th row */ 909 il[k] = jmin; 910 i = bj[jmin]; 911 jl[k] = jl[i]; jl[i] = k; 912 } 913 } 914 915 ierr = PetscFree(rtmp);CHKERRQ(ierr); 916 ierr = PetscFree(il);CHKERRQ(ierr); 917 ierr = PetscFree(dk);CHKERRQ(ierr); 918 ierr = PetscFree(pivots);CHKERRQ(ierr); 919 920 C->factor = FACTOR_CHOLESKY; 921 C->assembled = PETSC_TRUE; 922 C->preallocated = PETSC_TRUE; 923 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 924 PetscFunctionReturn(0); 925 } 926 927 /* 928 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 929 Version for blocks 2 by 2. 930 */ 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 933 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 934 { 935 Mat C = *B; 936 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 937 IS perm = b->row; 938 PetscErrorCode ierr; 939 PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 940 PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 941 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 942 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 943 944 PetscFunctionBegin; 945 /* initialization */ 946 /* il and jl record the first nonzero element in each row of the accessing 947 window U(0:k, k:mbs-1). 948 jl: list of rows to be added to uneliminated rows 949 i>= k: jl(i) is the first row to be added to row i 950 i< k: jl(i) is the row following row i in some list of rows 951 jl(i) = mbs indicates the end of a list 952 il(i): points to the first nonzero element in columns k,...,mbs-1 of 953 row i of U */ 954 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 955 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 957 jl = il + mbs; 958 for (i=0; i<mbs; i++) { 959 jl[i] = mbs; il[0] = 0; 960 } 961 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 962 uik = dk + 4; 963 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 964 965 /* check permutation */ 966 if (!a->permute){ 967 ai = a->i; aj = a->j; aa = a->a; 968 } else { 969 ai = a->inew; aj = a->jnew; 970 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 971 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 972 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 973 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 974 975 for (i=0; i<mbs; i++){ 976 jmin = ai[i]; jmax = ai[i+1]; 977 for (j=jmin; j<jmax; j++){ 978 while (a2anew[j] != j){ 979 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 980 for (k1=0; k1<4; k1++){ 981 dk[k1] = aa[k*4+k1]; 982 aa[k*4+k1] = aa[j*4+k1]; 983 aa[j*4+k1] = dk[k1]; 984 } 985 } 986 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 987 if (i > aj[j]){ 988 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 989 ap = aa + j*4; /* ptr to the beginning of the block */ 990 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 991 ap[1] = ap[2]; 992 ap[2] = dk[1]; 993 } 994 } 995 } 996 ierr = PetscFree(a2anew);CHKERRQ(ierr); 997 } 998 999 /* for each row k */ 1000 for (k = 0; k<mbs; k++){ 1001 1002 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1003 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 1004 ap = aa + jmin*4; 1005 for (j = jmin; j < jmax; j++){ 1006 vj = perm_ptr[aj[j]]; /* block col. index */ 1007 rtmp_ptr = rtmp + vj*4; 1008 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 1009 } 1010 1011 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1012 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 1013 i = jl[k]; /* first row to be added to k_th row */ 1014 1015 while (i < k){ 1016 nexti = jl[i]; /* next row to be added to k_th row */ 1017 1018 /* compute multiplier */ 1019 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1020 1021 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1022 diag = ba + i*4; 1023 u = ba + ili*4; 1024 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1025 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1026 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1027 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1028 1029 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1030 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1031 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1032 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1033 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1034 1035 /* update -U(i,k): ba[ili] = uik */ 1036 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1037 1038 /* add multiple of row i to k-th row ... */ 1039 jmin = ili + 1; jmax = bi[i+1]; 1040 if (jmin < jmax){ 1041 for (j=jmin; j<jmax; j++) { 1042 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1043 rtmp_ptr = rtmp + bj[j]*4; 1044 u = ba + j*4; 1045 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1046 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1047 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1048 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 1049 } 1050 1051 /* ... add i to row list for next nonzero entry */ 1052 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1053 j = bj[jmin]; 1054 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1055 } 1056 i = nexti; 1057 } 1058 1059 /* save nonzero entries in k-th row of U ... */ 1060 1061 /* invert diagonal block */ 1062 diag = ba+k*4; 1063 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1064 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1065 1066 jmin = bi[k]; jmax = bi[k+1]; 1067 if (jmin < jmax) { 1068 for (j=jmin; j<jmax; j++){ 1069 vj = bj[j]; /* block col. index of U */ 1070 u = ba + j*4; 1071 rtmp_ptr = rtmp + vj*4; 1072 for (k1=0; k1<4; k1++){ 1073 *u++ = *rtmp_ptr; 1074 *rtmp_ptr++ = 0.0; 1075 } 1076 } 1077 1078 /* ... add k to row list for first nonzero entry in k-th row */ 1079 il[k] = jmin; 1080 i = bj[jmin]; 1081 jl[k] = jl[i]; jl[i] = k; 1082 } 1083 } 1084 1085 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1086 ierr = PetscFree(il);CHKERRQ(ierr); 1087 ierr = PetscFree(dk);CHKERRQ(ierr); 1088 if (a->permute) { 1089 ierr = PetscFree(aa);CHKERRQ(ierr); 1090 } 1091 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 1092 C->factor = FACTOR_CHOLESKY; 1093 C->assembled = PETSC_TRUE; 1094 C->preallocated = PETSC_TRUE; 1095 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* 1100 Version for when blocks are 2 by 2 Using natural ordering 1101 */ 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 1104 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 1105 { 1106 Mat C = *B; 1107 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1108 PetscErrorCode ierr; 1109 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 1110 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 1111 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 1112 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 1113 1114 PetscFunctionBegin; 1115 /* initialization */ 1116 /* il and jl record the first nonzero element in each row of the accessing 1117 window U(0:k, k:mbs-1). 1118 jl: list of rows to be added to uneliminated rows 1119 i>= k: jl(i) is the first row to be added to row i 1120 i< k: jl(i) is the row following row i in some list of rows 1121 jl(i) = mbs indicates the end of a list 1122 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1123 row i of U */ 1124 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1125 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1126 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1127 jl = il + mbs; 1128 for (i=0; i<mbs; i++) { 1129 jl[i] = mbs; il[0] = 0; 1130 } 1131 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 1132 uik = dk + 4; 1133 1134 ai = a->i; aj = a->j; aa = a->a; 1135 1136 /* for each row k */ 1137 for (k = 0; k<mbs; k++){ 1138 1139 /*initialize k-th row with elements nonzero in row k of A */ 1140 jmin = ai[k]; jmax = ai[k+1]; 1141 ap = aa + jmin*4; 1142 for (j = jmin; j < jmax; j++){ 1143 vj = aj[j]; /* block col. index */ 1144 rtmp_ptr = rtmp + vj*4; 1145 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 1146 } 1147 1148 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1149 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 1150 i = jl[k]; /* first row to be added to k_th row */ 1151 1152 while (i < k){ 1153 nexti = jl[i]; /* next row to be added to k_th row */ 1154 1155 /* compute multiplier */ 1156 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1157 1158 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1159 diag = ba + i*4; 1160 u = ba + ili*4; 1161 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1162 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1163 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1164 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1165 1166 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1167 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1168 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1169 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1170 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1171 1172 /* update -U(i,k): ba[ili] = uik */ 1173 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1174 1175 /* add multiple of row i to k-th row ... */ 1176 jmin = ili + 1; jmax = bi[i+1]; 1177 if (jmin < jmax){ 1178 for (j=jmin; j<jmax; j++) { 1179 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1180 rtmp_ptr = rtmp + bj[j]*4; 1181 u = ba + j*4; 1182 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1183 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1184 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1185 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 1186 } 1187 1188 /* ... add i to row list for next nonzero entry */ 1189 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1190 j = bj[jmin]; 1191 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1192 } 1193 i = nexti; 1194 } 1195 1196 /* save nonzero entries in k-th row of U ... */ 1197 1198 /* invert diagonal block */ 1199 diag = ba+k*4; 1200 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1201 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1202 1203 jmin = bi[k]; jmax = bi[k+1]; 1204 if (jmin < jmax) { 1205 for (j=jmin; j<jmax; j++){ 1206 vj = bj[j]; /* block col. index of U */ 1207 u = ba + j*4; 1208 rtmp_ptr = rtmp + vj*4; 1209 for (k1=0; k1<4; k1++){ 1210 *u++ = *rtmp_ptr; 1211 *rtmp_ptr++ = 0.0; 1212 } 1213 } 1214 1215 /* ... add k to row list for first nonzero entry in k-th row */ 1216 il[k] = jmin; 1217 i = bj[jmin]; 1218 jl[k] = jl[i]; jl[i] = k; 1219 } 1220 } 1221 1222 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1223 ierr = PetscFree(il);CHKERRQ(ierr); 1224 ierr = PetscFree(dk);CHKERRQ(ierr); 1225 1226 C->factor = FACTOR_CHOLESKY; 1227 C->assembled = PETSC_TRUE; 1228 C->preallocated = PETSC_TRUE; 1229 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /* 1234 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 1235 Version for blocks are 1 by 1. 1236 */ 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1239 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 1240 { 1241 Mat C = *B; 1242 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1243 IS ip=b->row; 1244 PetscErrorCode ierr; 1245 PetscInt *rip,i,j,mbs = a->mbs,*bi=b->i,*bj=b->j; 1246 PetscInt *ai,*aj,*a2anew; 1247 PetscInt k,jmin,jmax,*jl,*il,vj,nexti,ili,nz; 1248 MatScalar *rtmp,*ba,*aa,dk,uikdi; 1249 1250 PetscFunctionBegin; 1251 /* printf(" MatCholeskyFactorNumeric_SeqSBAIJ_1 is called\n"); */ 1252 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1253 if (!a->permute){ 1254 ai = a->i; aj = a->j; aa = a->a; 1255 } else { 1256 ai = a->inew; aj = a->jnew; 1257 nz = ai[mbs]; 1258 ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1259 a2anew = a->a2anew; 1260 ba = a->a; 1261 for (j=0; j<nz; j++){ 1262 aa[a2anew[j]] = *(ba++); 1263 } 1264 } 1265 1266 /* initialization */ 1267 /* il and jl record the first nonzero element in each row of the accessing 1268 window U(0:k, k:mbs-1). 1269 jl: list of rows to be added to uneliminated rows 1270 i>= k: jl(i) is the first row to be added to row i 1271 i< k: jl(i) is the row following row i in some list of rows 1272 jl(i) = mbs indicates the end of a list 1273 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1274 row i of U */ 1275 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1276 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1277 jl = il + mbs; 1278 for (i=0; i<mbs; i++) { 1279 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1280 } 1281 ba = b->a; 1282 for (k = 0; k<mbs; k++){ 1283 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1284 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1285 for (j = jmin; j < jmax; j++){ 1286 vj = rip[aj[j]]; 1287 rtmp[vj] = aa[j]; 1288 } 1289 1290 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1291 dk = rtmp[k]; 1292 i = jl[k]; /* first row to be added to k_th row */ 1293 1294 while (i < k){ 1295 nexti = jl[i]; /* next row to be added to k_th row */ 1296 1297 /* compute multiplier, update diag(k) and U(i,k) */ 1298 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1299 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ /* uikdi = - ba[ili]*ba[i]; */ 1300 dk += uikdi*ba[ili]; 1301 ba[ili] = uikdi; /* -U(i,k) */ 1302 1303 /* add multiple of row i to k-th row ... */ 1304 jmin = ili + 1; jmax = bi[i+1]; 1305 if (jmin < jmax){ 1306 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1307 /* update il and jl for row i */ 1308 il[i] = jmin; 1309 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1310 } 1311 i = nexti; 1312 } 1313 1314 /* check for zero pivot and save diagoanl element */ 1315 if (dk == 0.0){ 1316 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1317 } 1318 1319 /* copy data ino U(k,:) */ 1320 ba[bi[k]] = 1.0/dk; /* ba[k] = 1.0/dk; */ /* U(k,k) */ 1321 jmin = bi[k]+1; jmax = bi[k+1]; 1322 if (jmin < jmax) { 1323 for (j=jmin; j<jmax; j++){ 1324 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1325 } 1326 /* ... add k to row list for first nonzero entry in k-th row */ 1327 il[k] = jmin; 1328 i = bj[jmin]; 1329 jl[k] = jl[i]; jl[i] = k; 1330 } 1331 } 1332 1333 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1334 ierr = PetscFree(il);CHKERRQ(ierr); 1335 if (a->permute){ 1336 ierr = PetscFree(aa);CHKERRQ(ierr); 1337 } 1338 1339 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1340 C->factor = FACTOR_CHOLESKY; 1341 C->assembled = PETSC_TRUE; 1342 C->preallocated = PETSC_TRUE; 1343 PetscLogFlops(b->mbs); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 /* 1348 Version for when blocks are 1 by 1 Using natural ordering 1349 */ 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1352 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1353 { 1354 Mat C = *B; 1355 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1356 PetscErrorCode ierr; 1357 PetscInt i,j,mbs = a->mbs; 1358 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1359 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1360 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1361 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 1362 PetscTruth damp,chshift; 1363 PetscInt nshift=0; 1364 1365 PetscFunctionBegin; 1366 /* initialization */ 1367 /* il and jl record the first nonzero element in each row of the accessing 1368 window U(0:k, k:mbs-1). 1369 jl: list of rows to be added to uneliminated rows 1370 i>= k: jl(i) is the first row to be added to row i 1371 i< k: jl(i) is the row following row i in some list of rows 1372 jl(i) = mbs indicates the end of a list 1373 il(i): points to the first nonzero element in U(i,k:mbs-1) 1374 */ 1375 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1376 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1377 jl = il + mbs; 1378 1379 shift_amount = 0; 1380 do { 1381 damp = PETSC_FALSE; 1382 chshift = PETSC_FALSE; 1383 for (i=0; i<mbs; i++) { 1384 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1385 } 1386 1387 for (k = 0; k<mbs; k++){ /* row k */ 1388 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1389 nz = ai[k+1] - ai[k]; 1390 acol = aj + ai[k]; 1391 aval = aa + ai[k]; 1392 bval = ba + bi[k]; 1393 while (nz -- ){ 1394 rtmp[*acol++] = *aval++; 1395 *bval++ = 0.0; /* for in-place factorization */ 1396 } 1397 /* damp the diagonal of the matrix */ 1398 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1399 1400 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1401 dk = rtmp[k]; 1402 i = jl[k]; /* first row to be added to k_th row */ 1403 1404 while (i < k){ 1405 nexti = jl[i]; /* next row to be added to k_th row */ 1406 1407 /* compute multiplier, update D(k) and U(i,k) */ 1408 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1409 uikdi = - ba[ili]*ba[bi[i]]; 1410 dk += uikdi*ba[ili]; 1411 ba[ili] = uikdi; /* -U(i,k) */ 1412 1413 /* add multiple of row i to k-th row ... */ 1414 jmin = ili + 1; 1415 nz = bi[i+1] - jmin; 1416 if (nz > 0){ 1417 bcol = bj + jmin; 1418 bval = ba + jmin; 1419 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1420 /* ... add i to row list for next nonzero entry */ 1421 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1422 j = bj[jmin]; 1423 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1424 } 1425 i = nexti; 1426 } 1427 1428 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1429 /* calculate a shift that would make this row diagonally dominant */ 1430 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1431 jmin = bi[k]+1; 1432 nz = bi[k+1] - jmin; 1433 if (nz){ 1434 bcol = bj + jmin; 1435 bval = ba + jmin; 1436 while (nz--){ 1437 rs += PetscAbsScalar(rtmp[*bcol++]); 1438 } 1439 } 1440 /* if this shift is less than the previous, just up the previous 1441 one by a bit */ 1442 shift_amount = PetscMax(rs,1.1*shift_amount); 1443 chshift = PETSC_TRUE; 1444 /* Unlike in the ILU case there is no exit condition on nshift: 1445 we increase the shift until it converges. There is no guarantee that 1446 this algorithm converges faster or slower, or is better or worse 1447 than the ILU algorithm. */ 1448 nshift++; 1449 break; 1450 } 1451 if (PetscRealPart(dk) < zeropivot){ 1452 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1453 if (damping > 0.0) { 1454 if (ndamp) damping *= 2.0; 1455 damp = PETSC_TRUE; 1456 ndamp++; 1457 break; 1458 } else if (PetscAbsScalar(dk) < zeropivot){ 1459 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1460 } else { 1461 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1462 } 1463 } 1464 1465 /* save nonzero entries in k-th row of U ... */ 1466 /* printf("%D, dk: %g, 1/dk: %g\n",k,dk,1/dk); */ 1467 ba[bi[k]] = 1.0/dk; 1468 jmin = bi[k]+1; 1469 nz = bi[k+1] - jmin; 1470 if (nz){ 1471 bcol = bj + jmin; 1472 bval = ba + jmin; 1473 while (nz--){ 1474 *bval++ = rtmp[*bcol]; 1475 rtmp[*bcol++] = 0.0; 1476 } 1477 /* ... add k to row list for first nonzero entry in k-th row */ 1478 il[k] = jmin; 1479 i = bj[jmin]; 1480 jl[k] = jl[i]; jl[i] = k; 1481 } 1482 } /* end of for (k = 0; k<mbs; k++) */ 1483 } while (damp||chshift); 1484 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1485 ierr = PetscFree(il);CHKERRQ(ierr); 1486 1487 C->factor = FACTOR_CHOLESKY; 1488 C->assembled = PETSC_TRUE; 1489 C->preallocated = PETSC_TRUE; 1490 PetscLogFlops(b->mbs); 1491 if (ndamp) { 1492 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1493 } 1494 if (nshift) { 1495 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1496 } 1497 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1503 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 1504 { 1505 PetscErrorCode ierr; 1506 Mat C; 1507 1508 PetscFunctionBegin; 1509 ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1510 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1511 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 1516