1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 /* Using Modified Sparse Row (MSR) storage. 3 See page 85, "Iterative Methods ..." by Saad. */ 4 5 /* 6 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 7 */ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/ilu.h" 12 #include "include/petscis.h" 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 16 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 17 { 18 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 19 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 20 int *jutmp,bs = a->bs,bs2=a->bs2; 21 int m,nzi,realloc = 0; 22 int *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 23 PetscTruth perm_identity; 24 25 PetscFunctionBegin; 26 27 /* check whether perm is the identity mapping */ 28 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 29 if (!perm_identity) a->permute = PETSC_TRUE; 30 31 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 32 33 if (perm_identity){ /* without permutation */ 34 ai = a->i; aj = a->j; 35 } else { /* non-trivial permutation */ 36 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 37 ai = a->inew; aj = a->jnew; 38 } 39 40 /* initialization */ 41 /* Don't know how many column pointers are needed so estimate. 42 Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 44 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 45 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 46 iu[0] = mbs+1; 47 juptr = mbs; 48 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 49 ierr = PetscMalloc(mbs*sizeof(int),&q);CHKERRQ(ierr); 50 for (i=0; i<mbs; i++){ 51 jl[i] = mbs; q[i] = 0; 52 } 53 54 /* for each row k */ 55 for (k=0; k<mbs; k++){ 56 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 57 q[k] = mbs; 58 /* initialize nonzero structure of k-th row to row rip[k] of A */ 59 jmin = ai[rip[k]]; 60 jmax = ai[rip[k]+1]; 61 for (j=jmin; j<jmax; j++){ 62 vj = rip[aj[j]]; /* col. value */ 63 if(vj > k){ 64 qm = k; 65 do { 66 m = qm; qm = q[m]; 67 } while(qm < vj); 68 if (qm == vj) { 69 printf(" error: duplicate entry in A\n"); break; 70 } 71 nzk++; 72 q[m] = vj; 73 q[vj] = qm; 74 } /* if(vj > k) */ 75 } /* for (j=jmin; j<jmax; j++) */ 76 77 /* modify nonzero structure of k-th row by computing fill-in 78 for each row i to be merged in */ 79 i = k; 80 i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */ 81 /* printf(" next pivot row i=%d\n",i); */ 82 while (i < mbs){ 83 /* merge row i into k-th row */ 84 nzi = iu[i+1] - (iu[i]+1); 85 jmin = iu[i] + 1; jmax = iu[i] + nzi; 86 qm = k; 87 for (j=jmin; j<jmax+1; j++){ 88 vj = ju[j]; 89 do { 90 m = qm; qm = q[m]; 91 } while (qm < vj); 92 if (qm != vj){ 93 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 94 } 95 } 96 i = jl[i]; /* next pivot row */ 97 } 98 99 /* add k to row list for first nonzero element in k-th row */ 100 if (nzk > 0){ 101 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 102 jl[k] = jl[i]; jl[i] = k; 103 } 104 iu[k+1] = iu[k] + nzk; /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/ 105 106 /* allocate more space to ju if needed */ 107 if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax); 108 /* estimate how much additional space we will need */ 109 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 110 /* just double the memory each time */ 111 maxadd = umax; 112 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 113 umax += maxadd; 114 115 /* allocate a longer ju */ 116 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 117 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 118 ierr = PetscFree(ju);CHKERRQ(ierr); 119 ju = jutmp; 120 realloc++; /* count how many times we realloc */ 121 } 122 123 /* save nonzero structure of k-th row in ju */ 124 i=k; 125 jumin = juptr + 1; juptr += nzk; 126 for (j=jumin; j<juptr+1; j++){ 127 i=q[i]; 128 ju[j]=i; 129 } 130 } 131 132 if (ai[mbs] != 0) { 133 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 134 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 135 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af); 136 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af); 137 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 138 } else { 139 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 140 } 141 142 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 143 ierr = PetscFree(q);CHKERRQ(ierr); 144 ierr = PetscFree(jl);CHKERRQ(ierr); 145 146 /* put together the new matrix */ 147 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 148 /* PetscLogObjectParent(*B,iperm); */ 149 b = (Mat_SeqSBAIJ*)(*B)->data; 150 ierr = PetscFree(b->imax);CHKERRQ(ierr); 151 b->singlemalloc = PETSC_FALSE; 152 /* the next line frees the default space generated by the Create() */ 153 ierr = PetscFree(b->a);CHKERRQ(ierr); 154 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 155 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 156 b->j = ju; 157 b->i = iu; 158 b->diag = 0; 159 b->ilen = 0; 160 b->imax = 0; 161 b->row = perm; 162 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 163 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 164 b->icol = perm; 165 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 166 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 167 /* In b structure: Free imax, ilen, old a, old j. 168 Allocate idnew, solve_work, new a, new j */ 169 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 170 b->s_maxnz = b->s_nz = iu[mbs]; 171 172 (*B)->factor = FACTOR_CHOLESKY; 173 (*B)->info.factor_mallocs = realloc; 174 (*B)->info.fill_ratio_given = f; 175 if (ai[mbs] != 0) { 176 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 177 } else { 178 (*B)->info.fill_ratio_needed = 0.0; 179 } 180 181 if (perm_identity){ 182 switch (bs) { 183 case 1: 184 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 185 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 186 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 187 break; 188 case 2: 189 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 190 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 191 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 192 break; 193 case 3: 194 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 195 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 196 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 197 break; 198 case 4: 199 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 200 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 201 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 202 break; 203 case 5: 204 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 205 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 206 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 207 break; 208 case 6: 209 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 210 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 211 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 212 break; 213 case 7: 214 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 215 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 216 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 217 break; 218 default: 219 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 220 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 221 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 222 break; 223 } 224 } 225 226 PetscFunctionReturn(0); 227 } 228 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 232 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 233 { 234 Mat C = *B; 235 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 236 IS perm = b->row; 237 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 238 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 239 int bs=a->bs,bs2 = a->bs2; 240 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 241 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 242 MatScalar *work; 243 int *pivots; 244 245 PetscFunctionBegin; 246 247 /* initialization */ 248 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 249 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 250 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 251 jl = il + mbs; 252 for (i=0; i<mbs; i++) { 253 jl[i] = mbs; il[0] = 0; 254 } 255 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 256 uik = dk + bs2; 257 work = uik + bs2; 258 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 259 260 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 261 262 /* check permutation */ 263 if (!a->permute){ 264 ai = a->i; aj = a->j; aa = a->a; 265 } else { 266 ai = a->inew; aj = a->jnew; 267 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 268 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 269 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 270 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 271 272 for (i=0; i<mbs; i++){ 273 jmin = ai[i]; jmax = ai[i+1]; 274 for (j=jmin; j<jmax; j++){ 275 while (a2anew[j] != j){ 276 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 277 for (k1=0; k1<bs2; k1++){ 278 dk[k1] = aa[k*bs2+k1]; 279 aa[k*bs2+k1] = aa[j*bs2+k1]; 280 aa[j*bs2+k1] = dk[k1]; 281 } 282 } 283 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 284 if (i > aj[j]){ 285 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 286 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 287 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 288 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 289 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 290 } 291 } 292 } 293 } 294 ierr = PetscFree(a2anew);CHKERRQ(ierr); 295 } 296 297 /* for each row k */ 298 for (k = 0; k<mbs; k++){ 299 300 /*initialize k-th row with elements nonzero in row perm(k) of A */ 301 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 302 if (jmin < jmax) { 303 ap = aa + jmin*bs2; 304 for (j = jmin; j < jmax; j++){ 305 vj = perm_ptr[aj[j]]; /* block col. index */ 306 rtmp_ptr = rtmp + vj*bs2; 307 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 308 } 309 } 310 311 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 312 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 i = jl[k]; /* first row to be added to k_th row */ 314 315 while (i < mbs){ 316 nexti = jl[i]; /* next row to be added to k_th row */ 317 318 /* compute multiplier */ 319 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 320 321 /* uik = -inv(Di)*U_bar(i,k) */ 322 diag = ba + i*bs2; 323 u = ba + ili*bs2; 324 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 325 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 326 327 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 328 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 329 330 /* update -U(i,k) */ 331 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 333 /* add multiple of row i to k-th row ... */ 334 jmin = ili + 1; jmax = bi[i+1]; 335 if (jmin < jmax){ 336 for (j=jmin; j<jmax; j++) { 337 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 338 rtmp_ptr = rtmp + bj[j]*bs2; 339 u = ba + j*bs2; 340 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 341 } 342 343 /* ... add i to row list for next nonzero entry */ 344 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 345 j = bj[jmin]; 346 jl[i] = jl[j]; jl[j] = i; /* update jl */ 347 } 348 i = nexti; 349 } 350 351 /* save nonzero entries in k-th row of U ... */ 352 353 /* invert diagonal block */ 354 diag = ba+k*bs2; 355 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 356 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 357 358 jmin = bi[k]; jmax = bi[k+1]; 359 if (jmin < jmax) { 360 for (j=jmin; j<jmax; j++){ 361 vj = bj[j]; /* block col. index of U */ 362 u = ba + j*bs2; 363 rtmp_ptr = rtmp + vj*bs2; 364 for (k1=0; k1<bs2; k1++){ 365 *u++ = *rtmp_ptr; 366 *rtmp_ptr++ = 0.0; 367 } 368 } 369 370 /* ... add k to row list for first nonzero entry in k-th row */ 371 il[k] = jmin; 372 i = bj[jmin]; 373 jl[k] = jl[i]; jl[i] = k; 374 } 375 } 376 377 ierr = PetscFree(rtmp);CHKERRQ(ierr); 378 ierr = PetscFree(il);CHKERRQ(ierr); 379 ierr = PetscFree(dk);CHKERRQ(ierr); 380 ierr = PetscFree(pivots);CHKERRQ(ierr); 381 if (a->permute){ 382 ierr = PetscFree(aa);CHKERRQ(ierr); 383 } 384 385 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 386 C->factor = FACTOR_CHOLESKY; 387 C->assembled = PETSC_TRUE; 388 C->preallocated = PETSC_TRUE; 389 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 395 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 396 { 397 Mat C = *B; 398 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 399 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 400 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 401 int bs=a->bs,bs2 = a->bs2; 402 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 403 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 404 MatScalar *work; 405 int *pivots; 406 407 PetscFunctionBegin; 408 409 /* initialization */ 410 411 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 412 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 413 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 414 jl = il + mbs; 415 for (i=0; i<mbs; i++) { 416 jl[i] = mbs; il[0] = 0; 417 } 418 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 419 uik = dk + bs2; 420 work = uik + bs2; 421 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 422 423 ai = a->i; aj = a->j; aa = a->a; 424 425 /* for each row k */ 426 for (k = 0; k<mbs; k++){ 427 428 /*initialize k-th row with elements nonzero in row k of A */ 429 jmin = ai[k]; jmax = ai[k+1]; 430 if (jmin < jmax) { 431 ap = aa + jmin*bs2; 432 for (j = jmin; j < jmax; j++){ 433 vj = aj[j]; /* block col. index */ 434 rtmp_ptr = rtmp + vj*bs2; 435 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 436 } 437 } 438 439 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 440 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 441 i = jl[k]; /* first row to be added to k_th row */ 442 443 while (i < mbs){ 444 nexti = jl[i]; /* next row to be added to k_th row */ 445 446 /* compute multiplier */ 447 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 448 449 /* uik = -inv(Di)*U_bar(i,k) */ 450 diag = ba + i*bs2; 451 u = ba + ili*bs2; 452 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 453 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 454 455 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 456 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 457 458 /* update -U(i,k) */ 459 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 460 461 /* add multiple of row i to k-th row ... */ 462 jmin = ili + 1; jmax = bi[i+1]; 463 if (jmin < jmax){ 464 for (j=jmin; j<jmax; j++) { 465 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 466 rtmp_ptr = rtmp + bj[j]*bs2; 467 u = ba + j*bs2; 468 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 469 } 470 471 /* ... add i to row list for next nonzero entry */ 472 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 473 j = bj[jmin]; 474 jl[i] = jl[j]; jl[j] = i; /* update jl */ 475 } 476 i = nexti; 477 } 478 479 /* save nonzero entries in k-th row of U ... */ 480 481 /* invert diagonal block */ 482 diag = ba+k*bs2; 483 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 484 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 485 486 jmin = bi[k]; jmax = bi[k+1]; 487 if (jmin < jmax) { 488 for (j=jmin; j<jmax; j++){ 489 vj = bj[j]; /* block col. index of U */ 490 u = ba + j*bs2; 491 rtmp_ptr = rtmp + vj*bs2; 492 for (k1=0; k1<bs2; k1++){ 493 *u++ = *rtmp_ptr; 494 *rtmp_ptr++ = 0.0; 495 } 496 } 497 498 /* ... add k to row list for first nonzero entry in k-th row */ 499 il[k] = jmin; 500 i = bj[jmin]; 501 jl[k] = jl[i]; jl[i] = k; 502 } 503 } 504 505 ierr = PetscFree(rtmp);CHKERRQ(ierr); 506 ierr = PetscFree(il);CHKERRQ(ierr); 507 ierr = PetscFree(dk);CHKERRQ(ierr); 508 ierr = PetscFree(pivots);CHKERRQ(ierr); 509 510 C->factor = FACTOR_CHOLESKY; 511 C->assembled = PETSC_TRUE; 512 C->preallocated = PETSC_TRUE; 513 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 514 PetscFunctionReturn(0); 515 } 516 517 /* 518 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 519 Version for blocks 2 by 2. 520 */ 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 523 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 524 { 525 Mat C = *B; 526 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 527 IS perm = b->row; 528 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 529 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 530 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 531 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 532 533 PetscFunctionBegin; 534 535 /* initialization */ 536 /* il and jl record the first nonzero element in each row of the accessing 537 window U(0:k, k:mbs-1). 538 jl: list of rows to be added to uneliminated rows 539 i>= k: jl(i) is the first row to be added to row i 540 i< k: jl(i) is the row following row i in some list of rows 541 jl(i) = mbs indicates the end of a list 542 il(i): points to the first nonzero element in columns k,...,mbs-1 of 543 row i of U */ 544 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 545 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 546 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 547 jl = il + mbs; 548 for (i=0; i<mbs; i++) { 549 jl[i] = mbs; il[0] = 0; 550 } 551 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 552 uik = dk + 4; 553 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 554 555 /* check permutation */ 556 if (!a->permute){ 557 ai = a->i; aj = a->j; aa = a->a; 558 } else { 559 ai = a->inew; aj = a->jnew; 560 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 561 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 562 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 563 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 564 565 for (i=0; i<mbs; i++){ 566 jmin = ai[i]; jmax = ai[i+1]; 567 for (j=jmin; j<jmax; j++){ 568 while (a2anew[j] != j){ 569 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 570 for (k1=0; k1<4; k1++){ 571 dk[k1] = aa[k*4+k1]; 572 aa[k*4+k1] = aa[j*4+k1]; 573 aa[j*4+k1] = dk[k1]; 574 } 575 } 576 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 577 if (i > aj[j]){ 578 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 579 ap = aa + j*4; /* ptr to the beginning of the block */ 580 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 581 ap[1] = ap[2]; 582 ap[2] = dk[1]; 583 } 584 } 585 } 586 ierr = PetscFree(a2anew);CHKERRQ(ierr); 587 } 588 589 /* for each row k */ 590 for (k = 0; k<mbs; k++){ 591 592 /*initialize k-th row with elements nonzero in row perm(k) of A */ 593 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 594 if (jmin < jmax) { 595 ap = aa + jmin*4; 596 for (j = jmin; j < jmax; j++){ 597 vj = perm_ptr[aj[j]]; /* block col. index */ 598 rtmp_ptr = rtmp + vj*4; 599 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 600 } 601 } 602 603 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 604 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 605 i = jl[k]; /* first row to be added to k_th row */ 606 607 while (i < mbs){ 608 nexti = jl[i]; /* next row to be added to k_th row */ 609 610 /* compute multiplier */ 611 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 612 613 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 614 diag = ba + i*4; 615 u = ba + ili*4; 616 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 617 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 618 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 619 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 620 621 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 622 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 623 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 624 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 625 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 626 627 /* update -U(i,k): ba[ili] = uik */ 628 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 629 630 /* add multiple of row i to k-th row ... */ 631 jmin = ili + 1; jmax = bi[i+1]; 632 if (jmin < jmax){ 633 for (j=jmin; j<jmax; j++) { 634 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 635 rtmp_ptr = rtmp + bj[j]*4; 636 u = ba + j*4; 637 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 638 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 639 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 640 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 641 } 642 643 /* ... add i to row list for next nonzero entry */ 644 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 645 j = bj[jmin]; 646 jl[i] = jl[j]; jl[j] = i; /* update jl */ 647 } 648 i = nexti; 649 } 650 651 /* save nonzero entries in k-th row of U ... */ 652 653 /* invert diagonal block */ 654 diag = ba+k*4; 655 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 656 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 657 658 jmin = bi[k]; jmax = bi[k+1]; 659 if (jmin < jmax) { 660 for (j=jmin; j<jmax; j++){ 661 vj = bj[j]; /* block col. index of U */ 662 u = ba + j*4; 663 rtmp_ptr = rtmp + vj*4; 664 for (k1=0; k1<4; k1++){ 665 *u++ = *rtmp_ptr; 666 *rtmp_ptr++ = 0.0; 667 } 668 } 669 670 /* ... add k to row list for first nonzero entry in k-th row */ 671 il[k] = jmin; 672 i = bj[jmin]; 673 jl[k] = jl[i]; jl[i] = k; 674 } 675 } 676 677 ierr = PetscFree(rtmp);CHKERRQ(ierr); 678 ierr = PetscFree(il);CHKERRQ(ierr); 679 ierr = PetscFree(dk);CHKERRQ(ierr); 680 if (a->permute) { 681 ierr = PetscFree(aa);CHKERRQ(ierr); 682 } 683 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 684 C->factor = FACTOR_CHOLESKY; 685 C->assembled = PETSC_TRUE; 686 C->preallocated = PETSC_TRUE; 687 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 Version for when blocks are 2 by 2 Using natural ordering 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 696 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 697 { 698 Mat C = *B; 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 700 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 701 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 702 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 703 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 704 705 PetscFunctionBegin; 706 707 /* initialization */ 708 /* il and jl record the first nonzero element in each row of the accessing 709 window U(0:k, k:mbs-1). 710 jl: list of rows to be added to uneliminated rows 711 i>= k: jl(i) is the first row to be added to row i 712 i< k: jl(i) is the row following row i in some list of rows 713 jl(i) = mbs indicates the end of a list 714 il(i): points to the first nonzero element in columns k,...,mbs-1 of 715 row i of U */ 716 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 717 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 718 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 719 jl = il + mbs; 720 for (i=0; i<mbs; i++) { 721 jl[i] = mbs; il[0] = 0; 722 } 723 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 724 uik = dk + 4; 725 726 ai = a->i; aj = a->j; aa = a->a; 727 728 /* for each row k */ 729 for (k = 0; k<mbs; k++){ 730 731 /*initialize k-th row with elements nonzero in row k of A */ 732 jmin = ai[k]; jmax = ai[k+1]; 733 if (jmin < jmax) { 734 ap = aa + jmin*4; 735 for (j = jmin; j < jmax; j++){ 736 vj = aj[j]; /* block col. index */ 737 rtmp_ptr = rtmp + vj*4; 738 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 739 } 740 } 741 742 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 743 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 744 i = jl[k]; /* first row to be added to k_th row */ 745 746 while (i < mbs){ 747 nexti = jl[i]; /* next row to be added to k_th row */ 748 749 /* compute multiplier */ 750 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 751 752 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 753 diag = ba + i*4; 754 u = ba + ili*4; 755 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 756 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 757 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 758 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 759 760 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 761 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 762 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 763 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 764 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 765 766 /* update -U(i,k): ba[ili] = uik */ 767 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 768 769 /* add multiple of row i to k-th row ... */ 770 jmin = ili + 1; jmax = bi[i+1]; 771 if (jmin < jmax){ 772 for (j=jmin; j<jmax; j++) { 773 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 774 rtmp_ptr = rtmp + bj[j]*4; 775 u = ba + j*4; 776 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 777 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 778 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 779 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 780 } 781 782 /* ... add i to row list for next nonzero entry */ 783 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 784 j = bj[jmin]; 785 jl[i] = jl[j]; jl[j] = i; /* update jl */ 786 } 787 i = nexti; 788 } 789 790 /* save nonzero entries in k-th row of U ... */ 791 792 /* invert diagonal block */ 793 diag = ba+k*4; 794 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 795 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 796 797 jmin = bi[k]; jmax = bi[k+1]; 798 if (jmin < jmax) { 799 for (j=jmin; j<jmax; j++){ 800 vj = bj[j]; /* block col. index of U */ 801 u = ba + j*4; 802 rtmp_ptr = rtmp + vj*4; 803 for (k1=0; k1<4; k1++){ 804 *u++ = *rtmp_ptr; 805 *rtmp_ptr++ = 0.0; 806 } 807 } 808 809 /* ... add k to row list for first nonzero entry in k-th row */ 810 il[k] = jmin; 811 i = bj[jmin]; 812 jl[k] = jl[i]; jl[i] = k; 813 } 814 } 815 816 ierr = PetscFree(rtmp);CHKERRQ(ierr); 817 ierr = PetscFree(il);CHKERRQ(ierr); 818 ierr = PetscFree(dk);CHKERRQ(ierr); 819 820 C->factor = FACTOR_CHOLESKY; 821 C->assembled = PETSC_TRUE; 822 C->preallocated = PETSC_TRUE; 823 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 824 PetscFunctionReturn(0); 825 } 826 827 /* 828 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 829 Version for blocks are 1 by 1. 830 */ 831 #undef __FUNCT__ 832 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 833 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 834 { 835 Mat C = *B; 836 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 837 IS ip = b->row; 838 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 839 int *ai,*aj,*r; 840 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 841 MatScalar *rtmp; 842 MatScalar *ba = b->a,*aa,ak; 843 MatScalar dk,uikdi; 844 845 PetscFunctionBegin; 846 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 847 if (!a->permute){ 848 ai = a->i; aj = a->j; aa = a->a; 849 } else { 850 ai = a->inew; aj = a->jnew; 851 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 852 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 853 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 854 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 855 856 jmin = ai[0]; jmax = ai[mbs]; 857 for (j=jmin; j<jmax; j++){ 858 while (r[j] != j){ 859 k = r[j]; r[j] = r[k]; r[k] = k; 860 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 861 } 862 } 863 ierr = PetscFree(r);CHKERRQ(ierr); 864 } 865 866 /* initialization */ 867 /* il and jl record the first nonzero element in each row of the accessing 868 window U(0:k, k:mbs-1). 869 jl: list of rows to be added to uneliminated rows 870 i>= k: jl(i) is the first row to be added to row i 871 i< k: jl(i) is the row following row i in some list of rows 872 jl(i) = mbs indicates the end of a list 873 il(i): points to the first nonzero element in columns k,...,mbs-1 of 874 row i of U */ 875 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 876 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 877 jl = il + mbs; 878 for (i=0; i<mbs; i++) { 879 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 880 } 881 882 /* for each row k */ 883 for (k = 0; k<mbs; k++){ 884 885 /*initialize k-th row with elements nonzero in row perm(k) of A */ 886 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 887 if (jmin < jmax) { 888 for (j = jmin; j < jmax; j++){ 889 vj = rip[aj[j]]; 890 rtmp[vj] = aa[j]; 891 } 892 } 893 894 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 895 dk = rtmp[k]; 896 i = jl[k]; /* first row to be added to k_th row */ 897 898 while (i < mbs){ 899 nexti = jl[i]; /* next row to be added to k_th row */ 900 901 /* compute multiplier, update D(k) and U(i,k) */ 902 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 903 uikdi = - ba[ili]*ba[i]; 904 dk += uikdi*ba[ili]; 905 ba[ili] = uikdi; /* -U(i,k) */ 906 907 /* add multiple of row i to k-th row ... */ 908 jmin = ili + 1; jmax = bi[i+1]; 909 if (jmin < jmax){ 910 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 911 /* ... add i to row list for next nonzero entry */ 912 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 913 j = bj[jmin]; 914 jl[i] = jl[j]; jl[j] = i; /* update jl */ 915 } 916 i = nexti; 917 } 918 919 /* check for zero pivot and save diagoanl element */ 920 if (dk == 0.0){ 921 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 922 /* 923 }else if (PetscRealPart(dk) < 0){ 924 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 925 */ 926 } 927 928 /* save nonzero entries in k-th row of U ... */ 929 ba[k] = 1.0/dk; 930 jmin = bi[k]; jmax = bi[k+1]; 931 if (jmin < jmax) { 932 for (j=jmin; j<jmax; j++){ 933 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 934 } 935 /* ... add k to row list for first nonzero entry in k-th row */ 936 il[k] = jmin; 937 i = bj[jmin]; 938 jl[k] = jl[i]; jl[i] = k; 939 } 940 } 941 942 ierr = PetscFree(rtmp);CHKERRQ(ierr); 943 ierr = PetscFree(il);CHKERRQ(ierr); 944 if (a->permute){ 945 ierr = PetscFree(aa);CHKERRQ(ierr); 946 } 947 948 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 949 C->factor = FACTOR_CHOLESKY; 950 C->assembled = PETSC_TRUE; 951 C->preallocated = PETSC_TRUE; 952 PetscLogFlops(b->mbs); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 Version for when blocks are 1 by 1 Using natural ordering 958 */ 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 961 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 962 { 963 Mat C = *B; 964 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 965 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 966 int *ai,*aj; 967 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 968 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 969 970 PetscFunctionBegin; 971 972 /* initialization */ 973 /* il and jl record the first nonzero element in each row of the accessing 974 window U(0:k, k:mbs-1). 975 jl: list of rows to be added to uneliminated rows 976 i>= k: jl(i) is the first row to be added to row i 977 i< k: jl(i) is the row following row i in some list of rows 978 jl(i) = mbs indicates the end of a list 979 il(i): points to the first nonzero element in columns k,...,mbs-1 of 980 row i of U */ 981 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 982 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 983 jl = il + mbs; 984 for (i=0; i<mbs; i++) { 985 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 986 } 987 988 ai = a->i; aj = a->j; aa = a->a; 989 990 /* for each row k */ 991 for (k = 0; k<mbs; k++){ 992 993 /*initialize k-th row with elements nonzero in row perm(k) of A */ 994 jmin = ai[k]; jmax = ai[k+1]; 995 if (jmin < jmax) { 996 for (j = jmin; j < jmax; j++){ 997 vj = aj[j]; 998 rtmp[vj] = aa[j]; 999 } 1000 } 1001 1002 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1003 dk = rtmp[k]; 1004 i = jl[k]; /* first row to be added to k_th row */ 1005 1006 while (i < mbs){ 1007 nexti = jl[i]; /* next row to be added to k_th row */ 1008 1009 /* compute multiplier, update D(k) and U(i,k) */ 1010 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1011 uikdi = - ba[ili]*ba[i]; 1012 dk += uikdi*ba[ili]; 1013 ba[ili] = uikdi; /* -U(i,k) */ 1014 1015 /* add multiple of row i to k-th row ... */ 1016 jmin = ili + 1; jmax = bi[i+1]; 1017 if (jmin < jmax){ 1018 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1019 /* ... add i to row list for next nonzero entry */ 1020 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1021 j = bj[jmin]; 1022 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1023 } 1024 i = nexti; 1025 } 1026 1027 /* check for zero pivot and save diagoanl element */ 1028 if (dk == 0.0){ 1029 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1030 /* 1031 }else if (PetscRealPart(dk) < 0){ 1032 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 1033 */ 1034 } 1035 1036 /* save nonzero entries in k-th row of U ... */ 1037 ba[k] = 1.0/dk; 1038 jmin = bi[k]; jmax = bi[k+1]; 1039 if (jmin < jmax) { 1040 for (j=jmin; j<jmax; j++){ 1041 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1042 } 1043 /* ... add k to row list for first nonzero entry in k-th row */ 1044 il[k] = jmin; 1045 i = bj[jmin]; 1046 jl[k] = jl[i]; jl[i] = k; 1047 } 1048 } 1049 1050 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1051 ierr = PetscFree(il);CHKERRQ(ierr); 1052 1053 C->factor = FACTOR_CHOLESKY; 1054 C->assembled = PETSC_TRUE; 1055 C->preallocated = PETSC_TRUE; 1056 PetscLogFlops(b->mbs); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1062 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1063 { 1064 int ierr; 1065 Mat C; 1066 1067 PetscFunctionBegin; 1068 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1069 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1070 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 1075