1 /*$Id: sbaijfact.c,v 1.56 2001/01/31 19:06:52 balay Exp bsmith $*/ 2 /* Using Modified Sparse Row (MSR) storage. 3 See page 85, "Iterative Methods ..." by Saad. */ 4 5 /* 6 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 7 */ 8 #include "sbaij.h" 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/ilu.h" 12 #include "include/petscis.h" 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 16 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 17 { 18 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 19 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 20 int *jutmp,bs = a->bs,bs2=a->bs2; 21 int m,nzi,realloc = 0; 22 int *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 23 PetscTruth perm_identity; 24 25 PetscFunctionBegin; 26 if (!perm) { 27 ierr = ISCreateStride(PETSC_COMM_SELF,mbs,0,1,&perm);CHKERRQ(ierr); 28 } 29 PetscValidHeaderSpecific(perm,IS_COOKIE); 30 31 /* check whether perm is the identity mapping */ 32 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 33 if (!perm_identity) a->permute = PETSC_TRUE; 34 35 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 36 37 if (perm_identity){ /* without permutation */ 38 ai = a->i; aj = a->j; 39 } else { /* non-trivial permutation */ 40 ierr = MatReorderingSeqSBAIJ(A, perm);CHKERRQ(ierr); 41 ai = a->inew; aj = a->jnew; 42 } 43 44 /* initialization */ 45 /* Don't know how many column pointers are needed so estimate. 46 Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 47 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 48 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 49 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 50 iu[0] = mbs+1; 51 juptr = mbs; 52 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 53 ierr = PetscMalloc(mbs*sizeof(int),&q);CHKERRQ(ierr); 54 for (i=0; i<mbs; i++){ 55 jl[i] = mbs; q[i] = 0; 56 } 57 58 /* for each row k */ 59 for (k=0; k<mbs; k++){ 60 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 61 q[k] = mbs; 62 /* initialize nonzero structure of k-th row to row rip[k] of A */ 63 jmin = ai[rip[k]]; 64 jmax = ai[rip[k]+1]; 65 for (j=jmin; j<jmax; j++){ 66 vj = rip[aj[j]]; /* col. value */ 67 if(vj > k){ 68 qm = k; 69 do { 70 m = qm; qm = q[m]; 71 } while(qm < vj); 72 if (qm == vj) { 73 printf(" error: duplicate entry in A\n"); break; 74 } 75 nzk++; 76 q[m] = vj; 77 q[vj] = qm; 78 } /* if(vj > k) */ 79 } /* for (j=jmin; j<jmax; j++) */ 80 81 /* modify nonzero structure of k-th row by computing fill-in 82 for each row i to be merged in */ 83 i = k; 84 i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */ 85 /* printf(" next pivot row i=%d\n",i); */ 86 while (i < mbs){ 87 /* merge row i into k-th row */ 88 nzi = iu[i+1] - (iu[i]+1); 89 jmin = iu[i] + 1; jmax = iu[i] + nzi; 90 qm = k; 91 for (j=jmin; j<jmax+1; j++){ 92 vj = ju[j]; 93 do { 94 m = qm; qm = q[m]; 95 } while (qm < vj); 96 if (qm != vj){ 97 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 98 } 99 } 100 i = jl[i]; /* next pivot row */ 101 } 102 103 /* add k to row list for first nonzero element in k-th row */ 104 if (nzk > 0){ 105 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 106 jl[k] = jl[i]; jl[i] = k; 107 } 108 iu[k+1] = iu[k] + nzk; /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/ 109 110 /* allocate more space to ju if needed */ 111 if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax); 112 /* estimate how much additional space we will need */ 113 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 114 /* just double the memory each time */ 115 maxadd = umax; 116 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 117 umax += maxadd; 118 119 /* allocate a longer ju */ 120 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 121 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 122 ierr = PetscFree(ju);CHKERRQ(ierr); 123 ju = jutmp; 124 realloc++; /* count how many times we realloc */ 125 } 126 127 /* save nonzero structure of k-th row in ju */ 128 i=k; 129 jumin = juptr + 1; juptr += nzk; 130 for (j=jumin; j<juptr+1; j++){ 131 i=q[i]; 132 ju[j]=i; 133 } 134 } 135 136 if (ai[mbs] != 0) { 137 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 138 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 139 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af); 140 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af); 141 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 142 } else { 143 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 144 } 145 146 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 147 ierr = PetscFree(q);CHKERRQ(ierr); 148 ierr = PetscFree(jl);CHKERRQ(ierr); 149 150 /* put together the new matrix */ 151 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 152 /* PetscLogObjectParent(*B,iperm); */ 153 b = (Mat_SeqSBAIJ*)(*B)->data; 154 ierr = PetscFree(b->imax);CHKERRQ(ierr); 155 b->singlemalloc = PETSC_FALSE; 156 /* the next line frees the default space generated by the Create() */ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); 158 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 159 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 160 b->j = ju; 161 b->i = iu; 162 b->diag = 0; 163 b->ilen = 0; 164 b->imax = 0; 165 b->row = perm; 166 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 167 b->icol = perm; 168 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 169 ierr = PetscMalloc((bs*mbs+bs)*sizeof(Scalar),&b->solve_work);CHKERRQ(ierr); 170 /* In b structure: Free imax, ilen, old a, old j. 171 Allocate idnew, solve_work, new a, new j */ 172 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 173 b->s_maxnz = b->s_nz = iu[mbs]; 174 175 (*B)->factor = FACTOR_CHOLESKY; 176 (*B)->info.factor_mallocs = realloc; 177 (*B)->info.fill_ratio_given = f; 178 if (ai[mbs] != 0) { 179 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 180 } else { 181 (*B)->info.fill_ratio_needed = 0.0; 182 } 183 184 if (perm_identity){ 185 switch (bs) { 186 case 1: 187 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 188 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 189 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 190 break; 191 case 2: 192 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 193 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 194 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 195 break; 196 case 3: 197 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 198 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 199 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 200 break; 201 case 4: 202 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 203 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 204 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 205 break; 206 case 5: 207 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 208 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 209 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 210 break; 211 case 6: 212 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 213 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 214 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 215 break; 216 case 7: 217 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 218 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 219 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 220 break; 221 default: 222 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 223 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 224 PetscLogInfo(A,"MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 225 break; 226 } 227 } 228 229 PetscFunctionReturn(0); 230 } 231 232 233 #undef __FUNC__ 234 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 235 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 236 { 237 Mat C = *B; 238 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 239 IS perm = b->row; 240 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 241 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 242 int bs=a->bs,bs2 = a->bs2; 243 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 244 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 245 MatScalar *work; 246 int *pivots; 247 248 PetscFunctionBegin; 249 250 /* initialization */ 251 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 252 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 253 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 254 jl = il + mbs; 255 for (i=0; i<mbs; i++) { 256 jl[i] = mbs; il[0] = 0; 257 } 258 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 259 uik = dk + bs2; 260 work = uik + bs2; 261 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 262 263 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 264 265 /* check permutation */ 266 if (!a->permute){ 267 ai = a->i; aj = a->j; aa = a->a; 268 } else { 269 ai = a->inew; aj = a->jnew; 270 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 271 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 272 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 273 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 274 275 for (i=0; i<mbs; i++){ 276 jmin = ai[i]; jmax = ai[i+1]; 277 for (j=jmin; j<jmax; j++){ 278 while (a2anew[j] != j){ 279 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 280 for (k1=0; k1<bs2; k1++){ 281 dk[k1] = aa[k*bs2+k1]; 282 aa[k*bs2+k1] = aa[j*bs2+k1]; 283 aa[j*bs2+k1] = dk[k1]; 284 } 285 } 286 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 287 if (i > aj[j]){ 288 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 289 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 290 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 291 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 292 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 293 } 294 } 295 } 296 } 297 ierr = PetscFree(a2anew);CHKERRQ(ierr); 298 } 299 300 /* for each row k */ 301 for (k = 0; k<mbs; k++){ 302 303 /*initialize k-th row with elements nonzero in row perm(k) of A */ 304 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 305 if (jmin < jmax) { 306 ap = aa + jmin*bs2; 307 for (j = jmin; j < jmax; j++){ 308 vj = perm_ptr[aj[j]]; /* block col. index */ 309 rtmp_ptr = rtmp + vj*bs2; 310 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 311 } 312 } 313 314 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 315 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 i = jl[k]; /* first row to be added to k_th row */ 317 318 while (i < mbs){ 319 nexti = jl[i]; /* next row to be added to k_th row */ 320 321 /* compute multiplier */ 322 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 323 324 /* uik = -inv(Di)*U_bar(i,k) */ 325 diag = ba + i*bs2; 326 u = ba + ili*bs2; 327 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 328 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 329 330 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 331 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 332 333 /* update -U(i,k) */ 334 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 335 336 /* add multiple of row i to k-th row ... */ 337 jmin = ili + 1; jmax = bi[i+1]; 338 if (jmin < jmax){ 339 for (j=jmin; j<jmax; j++) { 340 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 341 rtmp_ptr = rtmp + bj[j]*bs2; 342 u = ba + j*bs2; 343 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 344 } 345 346 /* ... add i to row list for next nonzero entry */ 347 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 348 j = bj[jmin]; 349 jl[i] = jl[j]; jl[j] = i; /* update jl */ 350 } 351 i = nexti; 352 } 353 354 /* save nonzero entries in k-th row of U ... */ 355 356 /* invert diagonal block */ 357 diag = ba+k*bs2; 358 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 359 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 360 361 jmin = bi[k]; jmax = bi[k+1]; 362 if (jmin < jmax) { 363 for (j=jmin; j<jmax; j++){ 364 vj = bj[j]; /* block col. index of U */ 365 u = ba + j*bs2; 366 rtmp_ptr = rtmp + vj*bs2; 367 for (k1=0; k1<bs2; k1++){ 368 *u++ = *rtmp_ptr; 369 *rtmp_ptr++ = 0.0; 370 } 371 } 372 373 /* ... add k to row list for first nonzero entry in k-th row */ 374 il[k] = jmin; 375 i = bj[jmin]; 376 jl[k] = jl[i]; jl[i] = k; 377 } 378 } 379 380 ierr = PetscFree(rtmp);CHKERRQ(ierr); 381 ierr = PetscFree(il);CHKERRQ(ierr); 382 ierr = PetscFree(dk);CHKERRQ(ierr); 383 ierr = PetscFree(pivots);CHKERRQ(ierr); 384 if (a->permute){ 385 ierr = PetscFree(aa);CHKERRQ(ierr); 386 } 387 388 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 389 C->factor = FACTOR_CHOLESKY; 390 C->assembled = PETSC_TRUE; 391 C->preallocated = PETSC_TRUE; 392 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNC__ 397 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 398 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 399 { 400 Mat C = *B; 401 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 402 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 403 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 404 int bs=a->bs,bs2 = a->bs2; 405 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 406 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 407 MatScalar *work; 408 int *pivots; 409 410 PetscFunctionBegin; 411 412 /* initialization */ 413 414 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 415 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 416 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 417 jl = il + mbs; 418 for (i=0; i<mbs; i++) { 419 jl[i] = mbs; il[0] = 0; 420 } 421 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 422 uik = dk + bs2; 423 work = uik + bs2; 424 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 425 426 ai = a->i; aj = a->j; aa = a->a; 427 428 /* for each row k */ 429 for (k = 0; k<mbs; k++){ 430 431 /*initialize k-th row with elements nonzero in row k of A */ 432 jmin = ai[k]; jmax = ai[k+1]; 433 if (jmin < jmax) { 434 ap = aa + jmin*bs2; 435 for (j = jmin; j < jmax; j++){ 436 vj = aj[j]; /* block col. index */ 437 rtmp_ptr = rtmp + vj*bs2; 438 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 439 } 440 } 441 442 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 443 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 444 i = jl[k]; /* first row to be added to k_th row */ 445 446 while (i < mbs){ 447 nexti = jl[i]; /* next row to be added to k_th row */ 448 449 /* compute multiplier */ 450 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 451 452 /* uik = -inv(Di)*U_bar(i,k) */ 453 diag = ba + i*bs2; 454 u = ba + ili*bs2; 455 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 456 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 457 458 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 459 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 460 461 /* update -U(i,k) */ 462 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 463 464 /* add multiple of row i to k-th row ... */ 465 jmin = ili + 1; jmax = bi[i+1]; 466 if (jmin < jmax){ 467 for (j=jmin; j<jmax; j++) { 468 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 469 rtmp_ptr = rtmp + bj[j]*bs2; 470 u = ba + j*bs2; 471 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 472 } 473 474 /* ... add i to row list for next nonzero entry */ 475 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 476 j = bj[jmin]; 477 jl[i] = jl[j]; jl[j] = i; /* update jl */ 478 } 479 i = nexti; 480 } 481 482 /* save nonzero entries in k-th row of U ... */ 483 484 /* invert diagonal block */ 485 diag = ba+k*bs2; 486 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 487 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 488 489 jmin = bi[k]; jmax = bi[k+1]; 490 if (jmin < jmax) { 491 for (j=jmin; j<jmax; j++){ 492 vj = bj[j]; /* block col. index of U */ 493 u = ba + j*bs2; 494 rtmp_ptr = rtmp + vj*bs2; 495 for (k1=0; k1<bs2; k1++){ 496 *u++ = *rtmp_ptr; 497 *rtmp_ptr++ = 0.0; 498 } 499 } 500 501 /* ... add k to row list for first nonzero entry in k-th row */ 502 il[k] = jmin; 503 i = bj[jmin]; 504 jl[k] = jl[i]; jl[i] = k; 505 } 506 } 507 508 ierr = PetscFree(rtmp);CHKERRQ(ierr); 509 ierr = PetscFree(il);CHKERRQ(ierr); 510 ierr = PetscFree(dk);CHKERRQ(ierr); 511 ierr = PetscFree(pivots);CHKERRQ(ierr); 512 513 C->factor = FACTOR_CHOLESKY; 514 C->assembled = PETSC_TRUE; 515 C->preallocated = PETSC_TRUE; 516 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 517 PetscFunctionReturn(0); 518 } 519 520 /* 521 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 522 Version for blocks 2 by 2. 523 */ 524 #undef __FUNC__ 525 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 526 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 527 { 528 Mat C = *B; 529 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 530 IS perm = b->row; 531 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 532 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 533 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 534 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 535 536 PetscFunctionBegin; 537 538 /* initialization */ 539 /* il and jl record the first nonzero element in each row of the accessing 540 window U(0:k, k:mbs-1). 541 jl: list of rows to be added to uneliminated rows 542 i>= k: jl(i) is the first row to be added to row i 543 i< k: jl(i) is the row following row i in some list of rows 544 jl(i) = mbs indicates the end of a list 545 il(i): points to the first nonzero element in columns k,...,mbs-1 of 546 row i of U */ 547 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 548 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 549 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 550 jl = il + mbs; 551 for (i=0; i<mbs; i++) { 552 jl[i] = mbs; il[0] = 0; 553 } 554 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 555 uik = dk + 4; 556 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 557 558 /* check permutation */ 559 if (!a->permute){ 560 ai = a->i; aj = a->j; aa = a->a; 561 } else { 562 ai = a->inew; aj = a->jnew; 563 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 564 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 565 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 566 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 567 568 for (i=0; i<mbs; i++){ 569 jmin = ai[i]; jmax = ai[i+1]; 570 for (j=jmin; j<jmax; j++){ 571 while (a2anew[j] != j){ 572 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 573 for (k1=0; k1<4; k1++){ 574 dk[k1] = aa[k*4+k1]; 575 aa[k*4+k1] = aa[j*4+k1]; 576 aa[j*4+k1] = dk[k1]; 577 } 578 } 579 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 580 if (i > aj[j]){ 581 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 582 ap = aa + j*4; /* ptr to the beginning of the block */ 583 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 584 ap[1] = ap[2]; 585 ap[2] = dk[1]; 586 } 587 } 588 } 589 ierr = PetscFree(a2anew);CHKERRQ(ierr); 590 } 591 592 /* for each row k */ 593 for (k = 0; k<mbs; k++){ 594 595 /*initialize k-th row with elements nonzero in row perm(k) of A */ 596 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 597 if (jmin < jmax) { 598 ap = aa + jmin*4; 599 for (j = jmin; j < jmax; j++){ 600 vj = perm_ptr[aj[j]]; /* block col. index */ 601 rtmp_ptr = rtmp + vj*4; 602 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 603 } 604 } 605 606 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 607 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 608 i = jl[k]; /* first row to be added to k_th row */ 609 610 while (i < mbs){ 611 nexti = jl[i]; /* next row to be added to k_th row */ 612 613 /* compute multiplier */ 614 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 615 616 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 617 diag = ba + i*4; 618 u = ba + ili*4; 619 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 620 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 621 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 622 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 623 624 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 625 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 626 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 627 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 628 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 629 630 /* update -U(i,k): ba[ili] = uik */ 631 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 632 633 /* add multiple of row i to k-th row ... */ 634 jmin = ili + 1; jmax = bi[i+1]; 635 if (jmin < jmax){ 636 for (j=jmin; j<jmax; j++) { 637 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 638 rtmp_ptr = rtmp + bj[j]*4; 639 u = ba + j*4; 640 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 641 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 642 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 643 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 644 } 645 646 /* ... add i to row list for next nonzero entry */ 647 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 648 j = bj[jmin]; 649 jl[i] = jl[j]; jl[j] = i; /* update jl */ 650 } 651 i = nexti; 652 } 653 654 /* save nonzero entries in k-th row of U ... */ 655 656 /* invert diagonal block */ 657 diag = ba+k*4; 658 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 659 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 660 661 jmin = bi[k]; jmax = bi[k+1]; 662 if (jmin < jmax) { 663 for (j=jmin; j<jmax; j++){ 664 vj = bj[j]; /* block col. index of U */ 665 u = ba + j*4; 666 rtmp_ptr = rtmp + vj*4; 667 for (k1=0; k1<4; k1++){ 668 *u++ = *rtmp_ptr; 669 *rtmp_ptr++ = 0.0; 670 } 671 } 672 673 /* ... add k to row list for first nonzero entry in k-th row */ 674 il[k] = jmin; 675 i = bj[jmin]; 676 jl[k] = jl[i]; jl[i] = k; 677 } 678 } 679 680 ierr = PetscFree(rtmp);CHKERRQ(ierr); 681 ierr = PetscFree(il);CHKERRQ(ierr); 682 ierr = PetscFree(dk);CHKERRQ(ierr); 683 if (a->permute) { 684 ierr = PetscFree(aa);CHKERRQ(ierr); 685 } 686 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 687 C->factor = FACTOR_CHOLESKY; 688 C->assembled = PETSC_TRUE; 689 C->preallocated = PETSC_TRUE; 690 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 691 PetscFunctionReturn(0); 692 } 693 694 /* 695 Version for when blocks are 2 by 2 Using natural ordering 696 */ 697 #undef __FUNC__ 698 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 699 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 700 { 701 Mat C = *B; 702 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 703 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 704 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 705 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 706 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 707 708 PetscFunctionBegin; 709 710 /* initialization */ 711 /* il and jl record the first nonzero element in each row of the accessing 712 window U(0:k, k:mbs-1). 713 jl: list of rows to be added to uneliminated rows 714 i>= k: jl(i) is the first row to be added to row i 715 i< k: jl(i) is the row following row i in some list of rows 716 jl(i) = mbs indicates the end of a list 717 il(i): points to the first nonzero element in columns k,...,mbs-1 of 718 row i of U */ 719 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 720 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 721 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 722 jl = il + mbs; 723 for (i=0; i<mbs; i++) { 724 jl[i] = mbs; il[0] = 0; 725 } 726 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 727 uik = dk + 4; 728 729 ai = a->i; aj = a->j; aa = a->a; 730 731 /* for each row k */ 732 for (k = 0; k<mbs; k++){ 733 734 /*initialize k-th row with elements nonzero in row k of A */ 735 jmin = ai[k]; jmax = ai[k+1]; 736 if (jmin < jmax) { 737 ap = aa + jmin*4; 738 for (j = jmin; j < jmax; j++){ 739 vj = aj[j]; /* block col. index */ 740 rtmp_ptr = rtmp + vj*4; 741 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 742 } 743 } 744 745 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 746 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 747 i = jl[k]; /* first row to be added to k_th row */ 748 749 while (i < mbs){ 750 nexti = jl[i]; /* next row to be added to k_th row */ 751 752 /* compute multiplier */ 753 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 754 755 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 756 diag = ba + i*4; 757 u = ba + ili*4; 758 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 759 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 760 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 761 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 762 763 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 764 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 765 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 766 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 767 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 768 769 /* update -U(i,k): ba[ili] = uik */ 770 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 771 772 /* add multiple of row i to k-th row ... */ 773 jmin = ili + 1; jmax = bi[i+1]; 774 if (jmin < jmax){ 775 for (j=jmin; j<jmax; j++) { 776 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 777 rtmp_ptr = rtmp + bj[j]*4; 778 u = ba + j*4; 779 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 780 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 781 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 782 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 783 } 784 785 /* ... add i to row list for next nonzero entry */ 786 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 787 j = bj[jmin]; 788 jl[i] = jl[j]; jl[j] = i; /* update jl */ 789 } 790 i = nexti; 791 } 792 793 /* save nonzero entries in k-th row of U ... */ 794 795 /* invert diagonal block */ 796 diag = ba+k*4; 797 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 798 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 799 800 jmin = bi[k]; jmax = bi[k+1]; 801 if (jmin < jmax) { 802 for (j=jmin; j<jmax; j++){ 803 vj = bj[j]; /* block col. index of U */ 804 u = ba + j*4; 805 rtmp_ptr = rtmp + vj*4; 806 for (k1=0; k1<4; k1++){ 807 *u++ = *rtmp_ptr; 808 *rtmp_ptr++ = 0.0; 809 } 810 } 811 812 /* ... add k to row list for first nonzero entry in k-th row */ 813 il[k] = jmin; 814 i = bj[jmin]; 815 jl[k] = jl[i]; jl[i] = k; 816 } 817 } 818 819 ierr = PetscFree(rtmp);CHKERRQ(ierr); 820 ierr = PetscFree(il);CHKERRQ(ierr); 821 ierr = PetscFree(dk);CHKERRQ(ierr); 822 823 C->factor = FACTOR_CHOLESKY; 824 C->assembled = PETSC_TRUE; 825 C->preallocated = PETSC_TRUE; 826 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 827 PetscFunctionReturn(0); 828 } 829 830 /* 831 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 832 Version for blocks are 1 by 1. 833 */ 834 #undef __FUNC__ 835 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 836 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 837 { 838 Mat C = *B; 839 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 840 IS ip = b->row; 841 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 842 int *ai,*aj,*r; 843 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 844 MatScalar *rtmp; 845 MatScalar *ba = b->a,*aa,ak; 846 MatScalar dk,uikdi; 847 848 PetscFunctionBegin; 849 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 850 if (!a->permute){ 851 ai = a->i; aj = a->j; aa = a->a; 852 } else { 853 ai = a->inew; aj = a->jnew; 854 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 855 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 856 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 857 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 858 859 jmin = ai[0]; jmax = ai[mbs]; 860 for (j=jmin; j<jmax; j++){ 861 while (r[j] != j){ 862 k = r[j]; r[j] = r[k]; r[k] = k; 863 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 864 } 865 } 866 ierr = PetscFree(r);CHKERRQ(ierr); 867 } 868 869 /* initialization */ 870 /* il and jl record the first nonzero element in each row of the accessing 871 window U(0:k, k:mbs-1). 872 jl: list of rows to be added to uneliminated rows 873 i>= k: jl(i) is the first row to be added to row i 874 i< k: jl(i) is the row following row i in some list of rows 875 jl(i) = mbs indicates the end of a list 876 il(i): points to the first nonzero element in columns k,...,mbs-1 of 877 row i of U */ 878 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 879 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 880 jl = il + mbs; 881 for (i=0; i<mbs; i++) { 882 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 883 } 884 885 /* for each row k */ 886 for (k = 0; k<mbs; k++){ 887 888 /*initialize k-th row with elements nonzero in row perm(k) of A */ 889 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 890 if (jmin < jmax) { 891 for (j = jmin; j < jmax; j++){ 892 vj = rip[aj[j]]; 893 rtmp[vj] = aa[j]; 894 } 895 } 896 897 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 898 dk = rtmp[k]; 899 i = jl[k]; /* first row to be added to k_th row */ 900 901 while (i < mbs){ 902 nexti = jl[i]; /* next row to be added to k_th row */ 903 904 /* compute multiplier, update D(k) and U(i,k) */ 905 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 906 uikdi = - ba[ili]*ba[i]; 907 dk += uikdi*ba[ili]; 908 ba[ili] = uikdi; /* -U(i,k) */ 909 910 /* add multiple of row i to k-th row ... */ 911 jmin = ili + 1; jmax = bi[i+1]; 912 if (jmin < jmax){ 913 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 914 /* ... add i to row list for next nonzero entry */ 915 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 916 j = bj[jmin]; 917 jl[i] = jl[j]; jl[j] = i; /* update jl */ 918 } 919 i = nexti; 920 } 921 922 /* check for zero pivot and save diagoanl element */ 923 if (dk == 0.0){ 924 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 925 /* 926 }else if (PetscRealPart(dk) < 0){ 927 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 928 */ 929 } 930 931 /* save nonzero entries in k-th row of U ... */ 932 ba[k] = 1.0/dk; 933 jmin = bi[k]; jmax = bi[k+1]; 934 if (jmin < jmax) { 935 for (j=jmin; j<jmax; j++){ 936 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 937 } 938 /* ... add k to row list for first nonzero entry in k-th row */ 939 il[k] = jmin; 940 i = bj[jmin]; 941 jl[k] = jl[i]; jl[i] = k; 942 } 943 } 944 945 ierr = PetscFree(rtmp);CHKERRQ(ierr); 946 ierr = PetscFree(il);CHKERRQ(ierr); 947 if (a->permute){ 948 ierr = PetscFree(aa);CHKERRQ(ierr); 949 } 950 951 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 952 C->factor = FACTOR_CHOLESKY; 953 C->assembled = PETSC_TRUE; 954 C->preallocated = PETSC_TRUE; 955 PetscLogFlops(b->mbs); 956 PetscFunctionReturn(0); 957 } 958 959 /* 960 Version for when blocks are 1 by 1 Using natural ordering 961 */ 962 #undef __FUNC__ 963 #define __FUNC__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 964 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 965 { 966 Mat C = *B; 967 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 968 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 969 int *ai,*aj; 970 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 971 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 972 973 PetscFunctionBegin; 974 975 /* initialization */ 976 /* il and jl record the first nonzero element in each row of the accessing 977 window U(0:k, k:mbs-1). 978 jl: list of rows to be added to uneliminated rows 979 i>= k: jl(i) is the first row to be added to row i 980 i< k: jl(i) is the row following row i in some list of rows 981 jl(i) = mbs indicates the end of a list 982 il(i): points to the first nonzero element in columns k,...,mbs-1 of 983 row i of U */ 984 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 985 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 986 jl = il + mbs; 987 for (i=0; i<mbs; i++) { 988 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 989 } 990 991 ai = a->i; aj = a->j; aa = a->a; 992 993 /* for each row k */ 994 for (k = 0; k<mbs; k++){ 995 996 /*initialize k-th row with elements nonzero in row perm(k) of A */ 997 jmin = ai[k]; jmax = ai[k+1]; 998 if (jmin < jmax) { 999 for (j = jmin; j < jmax; j++){ 1000 vj = aj[j]; 1001 rtmp[vj] = aa[j]; 1002 } 1003 } 1004 1005 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1006 dk = rtmp[k]; 1007 i = jl[k]; /* first row to be added to k_th row */ 1008 1009 while (i < mbs){ 1010 nexti = jl[i]; /* next row to be added to k_th row */ 1011 1012 /* compute multiplier, update D(k) and U(i,k) */ 1013 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1014 uikdi = - ba[ili]*ba[i]; 1015 dk += uikdi*ba[ili]; 1016 ba[ili] = uikdi; /* -U(i,k) */ 1017 1018 /* add multiple of row i to k-th row ... */ 1019 jmin = ili + 1; jmax = bi[i+1]; 1020 if (jmin < jmax){ 1021 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1022 /* ... add i to row list for next nonzero entry */ 1023 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1024 j = bj[jmin]; 1025 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1026 } 1027 i = nexti; 1028 } 1029 1030 /* check for zero pivot and save diagoanl element */ 1031 if (dk == 0.0){ 1032 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1033 /* 1034 }else if (PetscRealPart(dk) < 0){ 1035 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 1036 */ 1037 } 1038 1039 /* save nonzero entries in k-th row of U ... */ 1040 ba[k] = 1.0/dk; 1041 jmin = bi[k]; jmax = bi[k+1]; 1042 if (jmin < jmax) { 1043 for (j=jmin; j<jmax; j++){ 1044 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1045 } 1046 /* ... add k to row list for first nonzero entry in k-th row */ 1047 il[k] = jmin; 1048 i = bj[jmin]; 1049 jl[k] = jl[i]; jl[i] = k; 1050 } 1051 } 1052 1053 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1054 ierr = PetscFree(il);CHKERRQ(ierr); 1055 1056 C->factor = FACTOR_CHOLESKY; 1057 C->assembled = PETSC_TRUE; 1058 C->preallocated = PETSC_TRUE; 1059 PetscLogFlops(b->mbs); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatCholeskyFactor_SeqSBAIJ" 1065 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1066 { 1067 int ierr; 1068 Mat C; 1069 1070 PetscFunctionBegin; 1071 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1072 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1073 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 1078