1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 /* Using Modified Sparse Row (MSR) storage. 3 See page 85, "Iterative Methods ..." by Saad. */ 4 5 /* 6 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 7 */ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/ilu.h" 12 #include "include/petscis.h" 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 16 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 17 { 18 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 19 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 20 int *jutmp,bs = a->bs,bs2=a->bs2; 21 int m,nzi,realloc = 0; 22 int *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 23 PetscTruth perm_identity; 24 25 PetscFunctionBegin; 26 27 /* check whether perm is the identity mapping */ 28 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 29 if (!perm_identity) a->permute = PETSC_TRUE; 30 31 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 32 33 if (perm_identity){ /* without permutation */ 34 ai = a->i; aj = a->j; 35 } else { /* non-trivial permutation */ 36 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 37 ai = a->inew; aj = a->jnew; 38 } 39 40 /* initialization */ 41 /* Don't know how many column pointers are needed so estimate. 42 Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 44 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 45 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 46 iu[0] = mbs+1; 47 juptr = mbs; 48 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 49 ierr = PetscMalloc(mbs*sizeof(int),&q);CHKERRQ(ierr); 50 for (i=0; i<mbs; i++){ 51 jl[i] = mbs; q[i] = 0; 52 } 53 54 /* for each row k */ 55 for (k=0; k<mbs; k++){ 56 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 57 q[k] = mbs; 58 /* initialize nonzero structure of k-th row to row rip[k] of A */ 59 jmin = ai[rip[k]]; 60 jmax = ai[rip[k]+1]; 61 for (j=jmin; j<jmax; j++){ 62 vj = rip[aj[j]]; /* col. value */ 63 if(vj > k){ 64 qm = k; 65 do { 66 m = qm; qm = q[m]; 67 } while(qm < vj); 68 if (qm == vj) { 69 printf(" error: duplicate entry in A\n"); break; 70 } 71 nzk++; 72 q[m] = vj; 73 q[vj] = qm; 74 } /* if(vj > k) */ 75 } /* for (j=jmin; j<jmax; j++) */ 76 77 /* modify nonzero structure of k-th row by computing fill-in 78 for each row i to be merged in */ 79 i = k; 80 i = jl[i]; /* next pivot row (== mbs for symbolic factorization) */ 81 /* printf(" next pivot row i=%d\n",i); */ 82 while (i < mbs){ 83 /* merge row i into k-th row */ 84 nzi = iu[i+1] - (iu[i]+1); 85 jmin = iu[i] + 1; jmax = iu[i] + nzi; 86 qm = k; 87 for (j=jmin; j<jmax+1; j++){ 88 vj = ju[j]; 89 do { 90 m = qm; qm = q[m]; 91 } while (qm < vj); 92 if (qm != vj){ 93 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 94 } 95 } 96 i = jl[i]; /* next pivot row */ 97 } 98 99 /* add k to row list for first nonzero element in k-th row */ 100 if (nzk > 0){ 101 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 102 jl[k] = jl[i]; jl[i] = k; 103 } 104 iu[k+1] = iu[k] + nzk; /* printf(" iu[%d]=%d, umax=%d\n", k+1, iu[k+1],umax);*/ 105 106 /* allocate more space to ju if needed */ 107 if (iu[k+1] > umax) { printf("allocate more space, iu[%d]=%d > umax=%d\n",k+1, iu[k+1],umax); 108 /* estimate how much additional space we will need */ 109 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 110 /* just double the memory each time */ 111 maxadd = umax; 112 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 113 umax += maxadd; 114 115 /* allocate a longer ju */ 116 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 117 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 118 ierr = PetscFree(ju);CHKERRQ(ierr); 119 ju = jutmp; 120 realloc++; /* count how many times we realloc */ 121 } 122 123 /* save nonzero structure of k-th row in ju */ 124 i=k; 125 jumin = juptr + 1; juptr += nzk; 126 for (j=jumin; j<juptr+1; j++){ 127 i=q[i]; 128 ju[j]=i; 129 } 130 } 131 132 if (ai[mbs] != 0) { 133 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 134 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 135 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use \n",af); 136 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);\n",af); 137 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 138 } else { 139 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 140 } 141 142 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 143 ierr = PetscFree(q);CHKERRQ(ierr); 144 ierr = PetscFree(jl);CHKERRQ(ierr); 145 146 /* put together the new matrix */ 147 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 148 /* PetscLogObjectParent(*B,iperm); */ 149 b = (Mat_SeqSBAIJ*)(*B)->data; 150 ierr = PetscFree(b->imax);CHKERRQ(ierr); 151 b->singlemalloc = PETSC_FALSE; 152 /* the next line frees the default space generated by the Create() */ 153 ierr = PetscFree(b->a);CHKERRQ(ierr); 154 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 155 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 156 b->j = ju; 157 b->i = iu; 158 b->diag = 0; 159 b->ilen = 0; 160 b->imax = 0; 161 b->row = perm; 162 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 163 b->icol = perm; 164 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 165 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 166 /* In b structure: Free imax, ilen, old a, old j. 167 Allocate idnew, solve_work, new a, new j */ 168 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 169 b->s_maxnz = b->s_nz = iu[mbs]; 170 171 (*B)->factor = FACTOR_CHOLESKY; 172 (*B)->info.factor_mallocs = realloc; 173 (*B)->info.fill_ratio_given = f; 174 if (ai[mbs] != 0) { 175 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 176 } else { 177 (*B)->info.fill_ratio_needed = 0.0; 178 } 179 180 if (perm_identity){ 181 switch (bs) { 182 case 1: 183 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 184 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 185 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 186 break; 187 case 2: 188 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 189 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 190 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 191 break; 192 case 3: 193 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 194 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 195 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 196 break; 197 case 4: 198 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 199 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 200 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 201 break; 202 case 5: 203 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 204 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 205 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 206 break; 207 case 6: 208 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 209 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 210 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 211 break; 212 case 7: 213 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 214 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 215 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 216 break; 217 default: 218 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 219 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 220 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 221 break; 222 } 223 } 224 225 PetscFunctionReturn(0); 226 } 227 228 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 231 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 232 { 233 Mat C = *B; 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 235 IS perm = b->row; 236 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 237 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 238 int bs=a->bs,bs2 = a->bs2; 239 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 240 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 241 MatScalar *work; 242 int *pivots; 243 244 PetscFunctionBegin; 245 246 /* initialization */ 247 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 248 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 249 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 250 jl = il + mbs; 251 for (i=0; i<mbs; i++) { 252 jl[i] = mbs; il[0] = 0; 253 } 254 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 255 uik = dk + bs2; 256 work = uik + bs2; 257 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 258 259 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 260 261 /* check permutation */ 262 if (!a->permute){ 263 ai = a->i; aj = a->j; aa = a->a; 264 } else { 265 ai = a->inew; aj = a->jnew; 266 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 267 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 268 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 269 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 270 271 for (i=0; i<mbs; i++){ 272 jmin = ai[i]; jmax = ai[i+1]; 273 for (j=jmin; j<jmax; j++){ 274 while (a2anew[j] != j){ 275 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 276 for (k1=0; k1<bs2; k1++){ 277 dk[k1] = aa[k*bs2+k1]; 278 aa[k*bs2+k1] = aa[j*bs2+k1]; 279 aa[j*bs2+k1] = dk[k1]; 280 } 281 } 282 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 283 if (i > aj[j]){ 284 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 285 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 286 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 287 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 288 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 289 } 290 } 291 } 292 } 293 ierr = PetscFree(a2anew);CHKERRQ(ierr); 294 } 295 296 /* for each row k */ 297 for (k = 0; k<mbs; k++){ 298 299 /*initialize k-th row with elements nonzero in row perm(k) of A */ 300 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 301 if (jmin < jmax) { 302 ap = aa + jmin*bs2; 303 for (j = jmin; j < jmax; j++){ 304 vj = perm_ptr[aj[j]]; /* block col. index */ 305 rtmp_ptr = rtmp + vj*bs2; 306 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 307 } 308 } 309 310 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 311 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 312 i = jl[k]; /* first row to be added to k_th row */ 313 314 while (i < mbs){ 315 nexti = jl[i]; /* next row to be added to k_th row */ 316 317 /* compute multiplier */ 318 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 319 320 /* uik = -inv(Di)*U_bar(i,k) */ 321 diag = ba + i*bs2; 322 u = ba + ili*bs2; 323 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 324 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 325 326 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 327 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 328 329 /* update -U(i,k) */ 330 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 331 332 /* add multiple of row i to k-th row ... */ 333 jmin = ili + 1; jmax = bi[i+1]; 334 if (jmin < jmax){ 335 for (j=jmin; j<jmax; j++) { 336 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 337 rtmp_ptr = rtmp + bj[j]*bs2; 338 u = ba + j*bs2; 339 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 340 } 341 342 /* ... add i to row list for next nonzero entry */ 343 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 344 j = bj[jmin]; 345 jl[i] = jl[j]; jl[j] = i; /* update jl */ 346 } 347 i = nexti; 348 } 349 350 /* save nonzero entries in k-th row of U ... */ 351 352 /* invert diagonal block */ 353 diag = ba+k*bs2; 354 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 355 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 356 357 jmin = bi[k]; jmax = bi[k+1]; 358 if (jmin < jmax) { 359 for (j=jmin; j<jmax; j++){ 360 vj = bj[j]; /* block col. index of U */ 361 u = ba + j*bs2; 362 rtmp_ptr = rtmp + vj*bs2; 363 for (k1=0; k1<bs2; k1++){ 364 *u++ = *rtmp_ptr; 365 *rtmp_ptr++ = 0.0; 366 } 367 } 368 369 /* ... add k to row list for first nonzero entry in k-th row */ 370 il[k] = jmin; 371 i = bj[jmin]; 372 jl[k] = jl[i]; jl[i] = k; 373 } 374 } 375 376 ierr = PetscFree(rtmp);CHKERRQ(ierr); 377 ierr = PetscFree(il);CHKERRQ(ierr); 378 ierr = PetscFree(dk);CHKERRQ(ierr); 379 ierr = PetscFree(pivots);CHKERRQ(ierr); 380 if (a->permute){ 381 ierr = PetscFree(aa);CHKERRQ(ierr); 382 } 383 384 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 385 C->factor = FACTOR_CHOLESKY; 386 C->assembled = PETSC_TRUE; 387 C->preallocated = PETSC_TRUE; 388 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 394 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 395 { 396 Mat C = *B; 397 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 398 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 399 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 400 int bs=a->bs,bs2 = a->bs2; 401 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 402 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 403 MatScalar *work; 404 int *pivots; 405 406 PetscFunctionBegin; 407 408 /* initialization */ 409 410 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 411 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 412 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 413 jl = il + mbs; 414 for (i=0; i<mbs; i++) { 415 jl[i] = mbs; il[0] = 0; 416 } 417 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 418 uik = dk + bs2; 419 work = uik + bs2; 420 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 421 422 ai = a->i; aj = a->j; aa = a->a; 423 424 /* for each row k */ 425 for (k = 0; k<mbs; k++){ 426 427 /*initialize k-th row with elements nonzero in row k of A */ 428 jmin = ai[k]; jmax = ai[k+1]; 429 if (jmin < jmax) { 430 ap = aa + jmin*bs2; 431 for (j = jmin; j < jmax; j++){ 432 vj = aj[j]; /* block col. index */ 433 rtmp_ptr = rtmp + vj*bs2; 434 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 435 } 436 } 437 438 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 439 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 440 i = jl[k]; /* first row to be added to k_th row */ 441 442 while (i < mbs){ 443 nexti = jl[i]; /* next row to be added to k_th row */ 444 445 /* compute multiplier */ 446 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 447 448 /* uik = -inv(Di)*U_bar(i,k) */ 449 diag = ba + i*bs2; 450 u = ba + ili*bs2; 451 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 452 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 453 454 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 455 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 456 457 /* update -U(i,k) */ 458 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 459 460 /* add multiple of row i to k-th row ... */ 461 jmin = ili + 1; jmax = bi[i+1]; 462 if (jmin < jmax){ 463 for (j=jmin; j<jmax; j++) { 464 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 465 rtmp_ptr = rtmp + bj[j]*bs2; 466 u = ba + j*bs2; 467 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 468 } 469 470 /* ... add i to row list for next nonzero entry */ 471 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 472 j = bj[jmin]; 473 jl[i] = jl[j]; jl[j] = i; /* update jl */ 474 } 475 i = nexti; 476 } 477 478 /* save nonzero entries in k-th row of U ... */ 479 480 /* invert diagonal block */ 481 diag = ba+k*bs2; 482 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 483 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 484 485 jmin = bi[k]; jmax = bi[k+1]; 486 if (jmin < jmax) { 487 for (j=jmin; j<jmax; j++){ 488 vj = bj[j]; /* block col. index of U */ 489 u = ba + j*bs2; 490 rtmp_ptr = rtmp + vj*bs2; 491 for (k1=0; k1<bs2; k1++){ 492 *u++ = *rtmp_ptr; 493 *rtmp_ptr++ = 0.0; 494 } 495 } 496 497 /* ... add k to row list for first nonzero entry in k-th row */ 498 il[k] = jmin; 499 i = bj[jmin]; 500 jl[k] = jl[i]; jl[i] = k; 501 } 502 } 503 504 ierr = PetscFree(rtmp);CHKERRQ(ierr); 505 ierr = PetscFree(il);CHKERRQ(ierr); 506 ierr = PetscFree(dk);CHKERRQ(ierr); 507 ierr = PetscFree(pivots);CHKERRQ(ierr); 508 509 C->factor = FACTOR_CHOLESKY; 510 C->assembled = PETSC_TRUE; 511 C->preallocated = PETSC_TRUE; 512 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 513 PetscFunctionReturn(0); 514 } 515 516 /* 517 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 518 Version for blocks 2 by 2. 519 */ 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 522 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 523 { 524 Mat C = *B; 525 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 526 IS perm = b->row; 527 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 528 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 529 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 530 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 531 532 PetscFunctionBegin; 533 534 /* initialization */ 535 /* il and jl record the first nonzero element in each row of the accessing 536 window U(0:k, k:mbs-1). 537 jl: list of rows to be added to uneliminated rows 538 i>= k: jl(i) is the first row to be added to row i 539 i< k: jl(i) is the row following row i in some list of rows 540 jl(i) = mbs indicates the end of a list 541 il(i): points to the first nonzero element in columns k,...,mbs-1 of 542 row i of U */ 543 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 544 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 545 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 546 jl = il + mbs; 547 for (i=0; i<mbs; i++) { 548 jl[i] = mbs; il[0] = 0; 549 } 550 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 551 uik = dk + 4; 552 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 553 554 /* check permutation */ 555 if (!a->permute){ 556 ai = a->i; aj = a->j; aa = a->a; 557 } else { 558 ai = a->inew; aj = a->jnew; 559 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 560 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 561 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 562 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 563 564 for (i=0; i<mbs; i++){ 565 jmin = ai[i]; jmax = ai[i+1]; 566 for (j=jmin; j<jmax; j++){ 567 while (a2anew[j] != j){ 568 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 569 for (k1=0; k1<4; k1++){ 570 dk[k1] = aa[k*4+k1]; 571 aa[k*4+k1] = aa[j*4+k1]; 572 aa[j*4+k1] = dk[k1]; 573 } 574 } 575 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 576 if (i > aj[j]){ 577 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 578 ap = aa + j*4; /* ptr to the beginning of the block */ 579 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 580 ap[1] = ap[2]; 581 ap[2] = dk[1]; 582 } 583 } 584 } 585 ierr = PetscFree(a2anew);CHKERRQ(ierr); 586 } 587 588 /* for each row k */ 589 for (k = 0; k<mbs; k++){ 590 591 /*initialize k-th row with elements nonzero in row perm(k) of A */ 592 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 593 if (jmin < jmax) { 594 ap = aa + jmin*4; 595 for (j = jmin; j < jmax; j++){ 596 vj = perm_ptr[aj[j]]; /* block col. index */ 597 rtmp_ptr = rtmp + vj*4; 598 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 599 } 600 } 601 602 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 603 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 604 i = jl[k]; /* first row to be added to k_th row */ 605 606 while (i < mbs){ 607 nexti = jl[i]; /* next row to be added to k_th row */ 608 609 /* compute multiplier */ 610 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 611 612 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 613 diag = ba + i*4; 614 u = ba + ili*4; 615 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 616 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 617 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 618 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 619 620 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 621 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 622 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 623 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 624 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 625 626 /* update -U(i,k): ba[ili] = uik */ 627 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 628 629 /* add multiple of row i to k-th row ... */ 630 jmin = ili + 1; jmax = bi[i+1]; 631 if (jmin < jmax){ 632 for (j=jmin; j<jmax; j++) { 633 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 634 rtmp_ptr = rtmp + bj[j]*4; 635 u = ba + j*4; 636 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 637 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 638 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 639 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 640 } 641 642 /* ... add i to row list for next nonzero entry */ 643 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 644 j = bj[jmin]; 645 jl[i] = jl[j]; jl[j] = i; /* update jl */ 646 } 647 i = nexti; 648 } 649 650 /* save nonzero entries in k-th row of U ... */ 651 652 /* invert diagonal block */ 653 diag = ba+k*4; 654 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 655 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 656 657 jmin = bi[k]; jmax = bi[k+1]; 658 if (jmin < jmax) { 659 for (j=jmin; j<jmax; j++){ 660 vj = bj[j]; /* block col. index of U */ 661 u = ba + j*4; 662 rtmp_ptr = rtmp + vj*4; 663 for (k1=0; k1<4; k1++){ 664 *u++ = *rtmp_ptr; 665 *rtmp_ptr++ = 0.0; 666 } 667 } 668 669 /* ... add k to row list for first nonzero entry in k-th row */ 670 il[k] = jmin; 671 i = bj[jmin]; 672 jl[k] = jl[i]; jl[i] = k; 673 } 674 } 675 676 ierr = PetscFree(rtmp);CHKERRQ(ierr); 677 ierr = PetscFree(il);CHKERRQ(ierr); 678 ierr = PetscFree(dk);CHKERRQ(ierr); 679 if (a->permute) { 680 ierr = PetscFree(aa);CHKERRQ(ierr); 681 } 682 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 683 C->factor = FACTOR_CHOLESKY; 684 C->assembled = PETSC_TRUE; 685 C->preallocated = PETSC_TRUE; 686 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 Version for when blocks are 2 by 2 Using natural ordering 692 */ 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 695 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 696 { 697 Mat C = *B; 698 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 699 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 700 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 701 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 702 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 703 704 PetscFunctionBegin; 705 706 /* initialization */ 707 /* il and jl record the first nonzero element in each row of the accessing 708 window U(0:k, k:mbs-1). 709 jl: list of rows to be added to uneliminated rows 710 i>= k: jl(i) is the first row to be added to row i 711 i< k: jl(i) is the row following row i in some list of rows 712 jl(i) = mbs indicates the end of a list 713 il(i): points to the first nonzero element in columns k,...,mbs-1 of 714 row i of U */ 715 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 716 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 717 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 718 jl = il + mbs; 719 for (i=0; i<mbs; i++) { 720 jl[i] = mbs; il[0] = 0; 721 } 722 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 723 uik = dk + 4; 724 725 ai = a->i; aj = a->j; aa = a->a; 726 727 /* for each row k */ 728 for (k = 0; k<mbs; k++){ 729 730 /*initialize k-th row with elements nonzero in row k of A */ 731 jmin = ai[k]; jmax = ai[k+1]; 732 if (jmin < jmax) { 733 ap = aa + jmin*4; 734 for (j = jmin; j < jmax; j++){ 735 vj = aj[j]; /* block col. index */ 736 rtmp_ptr = rtmp + vj*4; 737 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 738 } 739 } 740 741 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 742 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 743 i = jl[k]; /* first row to be added to k_th row */ 744 745 while (i < mbs){ 746 nexti = jl[i]; /* next row to be added to k_th row */ 747 748 /* compute multiplier */ 749 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 750 751 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 752 diag = ba + i*4; 753 u = ba + ili*4; 754 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 755 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 756 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 757 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 758 759 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 760 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 761 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 762 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 763 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 764 765 /* update -U(i,k): ba[ili] = uik */ 766 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 767 768 /* add multiple of row i to k-th row ... */ 769 jmin = ili + 1; jmax = bi[i+1]; 770 if (jmin < jmax){ 771 for (j=jmin; j<jmax; j++) { 772 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 773 rtmp_ptr = rtmp + bj[j]*4; 774 u = ba + j*4; 775 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 776 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 777 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 778 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 779 } 780 781 /* ... add i to row list for next nonzero entry */ 782 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 783 j = bj[jmin]; 784 jl[i] = jl[j]; jl[j] = i; /* update jl */ 785 } 786 i = nexti; 787 } 788 789 /* save nonzero entries in k-th row of U ... */ 790 791 /* invert diagonal block */ 792 diag = ba+k*4; 793 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 794 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 795 796 jmin = bi[k]; jmax = bi[k+1]; 797 if (jmin < jmax) { 798 for (j=jmin; j<jmax; j++){ 799 vj = bj[j]; /* block col. index of U */ 800 u = ba + j*4; 801 rtmp_ptr = rtmp + vj*4; 802 for (k1=0; k1<4; k1++){ 803 *u++ = *rtmp_ptr; 804 *rtmp_ptr++ = 0.0; 805 } 806 } 807 808 /* ... add k to row list for first nonzero entry in k-th row */ 809 il[k] = jmin; 810 i = bj[jmin]; 811 jl[k] = jl[i]; jl[i] = k; 812 } 813 } 814 815 ierr = PetscFree(rtmp);CHKERRQ(ierr); 816 ierr = PetscFree(il);CHKERRQ(ierr); 817 ierr = PetscFree(dk);CHKERRQ(ierr); 818 819 C->factor = FACTOR_CHOLESKY; 820 C->assembled = PETSC_TRUE; 821 C->preallocated = PETSC_TRUE; 822 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 823 PetscFunctionReturn(0); 824 } 825 826 /* 827 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 828 Version for blocks are 1 by 1. 829 */ 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 832 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 833 { 834 Mat C = *B; 835 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 836 IS ip = b->row; 837 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 838 int *ai,*aj,*r; 839 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 840 MatScalar *rtmp; 841 MatScalar *ba = b->a,*aa,ak; 842 MatScalar dk,uikdi; 843 844 PetscFunctionBegin; 845 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 846 if (!a->permute){ 847 ai = a->i; aj = a->j; aa = a->a; 848 } else { 849 ai = a->inew; aj = a->jnew; 850 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 851 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 852 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 853 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 854 855 jmin = ai[0]; jmax = ai[mbs]; 856 for (j=jmin; j<jmax; j++){ 857 while (r[j] != j){ 858 k = r[j]; r[j] = r[k]; r[k] = k; 859 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 860 } 861 } 862 ierr = PetscFree(r);CHKERRQ(ierr); 863 } 864 865 /* initialization */ 866 /* il and jl record the first nonzero element in each row of the accessing 867 window U(0:k, k:mbs-1). 868 jl: list of rows to be added to uneliminated rows 869 i>= k: jl(i) is the first row to be added to row i 870 i< k: jl(i) is the row following row i in some list of rows 871 jl(i) = mbs indicates the end of a list 872 il(i): points to the first nonzero element in columns k,...,mbs-1 of 873 row i of U */ 874 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 875 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 876 jl = il + mbs; 877 for (i=0; i<mbs; i++) { 878 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 879 } 880 881 /* for each row k */ 882 for (k = 0; k<mbs; k++){ 883 884 /*initialize k-th row with elements nonzero in row perm(k) of A */ 885 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 886 if (jmin < jmax) { 887 for (j = jmin; j < jmax; j++){ 888 vj = rip[aj[j]]; 889 rtmp[vj] = aa[j]; 890 } 891 } 892 893 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 894 dk = rtmp[k]; 895 i = jl[k]; /* first row to be added to k_th row */ 896 897 while (i < mbs){ 898 nexti = jl[i]; /* next row to be added to k_th row */ 899 900 /* compute multiplier, update D(k) and U(i,k) */ 901 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 902 uikdi = - ba[ili]*ba[i]; 903 dk += uikdi*ba[ili]; 904 ba[ili] = uikdi; /* -U(i,k) */ 905 906 /* add multiple of row i to k-th row ... */ 907 jmin = ili + 1; jmax = bi[i+1]; 908 if (jmin < jmax){ 909 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 910 /* ... add i to row list for next nonzero entry */ 911 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 912 j = bj[jmin]; 913 jl[i] = jl[j]; jl[j] = i; /* update jl */ 914 } 915 i = nexti; 916 } 917 918 /* check for zero pivot and save diagoanl element */ 919 if (dk == 0.0){ 920 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 921 /* 922 }else if (PetscRealPart(dk) < 0){ 923 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 924 */ 925 } 926 927 /* save nonzero entries in k-th row of U ... */ 928 ba[k] = 1.0/dk; 929 jmin = bi[k]; jmax = bi[k+1]; 930 if (jmin < jmax) { 931 for (j=jmin; j<jmax; j++){ 932 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 933 } 934 /* ... add k to row list for first nonzero entry in k-th row */ 935 il[k] = jmin; 936 i = bj[jmin]; 937 jl[k] = jl[i]; jl[i] = k; 938 } 939 } 940 941 ierr = PetscFree(rtmp);CHKERRQ(ierr); 942 ierr = PetscFree(il);CHKERRQ(ierr); 943 if (a->permute){ 944 ierr = PetscFree(aa);CHKERRQ(ierr); 945 } 946 947 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 948 C->factor = FACTOR_CHOLESKY; 949 C->assembled = PETSC_TRUE; 950 C->preallocated = PETSC_TRUE; 951 PetscLogFlops(b->mbs); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Version for when blocks are 1 by 1 Using natural ordering 957 */ 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 960 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 961 { 962 Mat C = *B; 963 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 964 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 965 int *ai,*aj; 966 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 967 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 968 969 PetscFunctionBegin; 970 971 /* initialization */ 972 /* il and jl record the first nonzero element in each row of the accessing 973 window U(0:k, k:mbs-1). 974 jl: list of rows to be added to uneliminated rows 975 i>= k: jl(i) is the first row to be added to row i 976 i< k: jl(i) is the row following row i in some list of rows 977 jl(i) = mbs indicates the end of a list 978 il(i): points to the first nonzero element in columns k,...,mbs-1 of 979 row i of U */ 980 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 981 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 982 jl = il + mbs; 983 for (i=0; i<mbs; i++) { 984 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 985 } 986 987 ai = a->i; aj = a->j; aa = a->a; 988 989 /* for each row k */ 990 for (k = 0; k<mbs; k++){ 991 992 /*initialize k-th row with elements nonzero in row perm(k) of A */ 993 jmin = ai[k]; jmax = ai[k+1]; 994 if (jmin < jmax) { 995 for (j = jmin; j < jmax; j++){ 996 vj = aj[j]; 997 rtmp[vj] = aa[j]; 998 } 999 } 1000 1001 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1002 dk = rtmp[k]; 1003 i = jl[k]; /* first row to be added to k_th row */ 1004 1005 while (i < mbs){ 1006 nexti = jl[i]; /* next row to be added to k_th row */ 1007 1008 /* compute multiplier, update D(k) and U(i,k) */ 1009 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1010 uikdi = - ba[ili]*ba[i]; 1011 dk += uikdi*ba[ili]; 1012 ba[ili] = uikdi; /* -U(i,k) */ 1013 1014 /* add multiple of row i to k-th row ... */ 1015 jmin = ili + 1; jmax = bi[i+1]; 1016 if (jmin < jmax){ 1017 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1018 /* ... add i to row list for next nonzero entry */ 1019 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1020 j = bj[jmin]; 1021 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1022 } 1023 i = nexti; 1024 } 1025 1026 /* check for zero pivot and save diagoanl element */ 1027 if (dk == 0.0){ 1028 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1029 /* 1030 }else if (PetscRealPart(dk) < 0){ 1031 ierr = PetscPrintf(PETSC_COMM_SELF,"Negative pivot: d[%d] = %g\n",k,dk); 1032 */ 1033 } 1034 1035 /* save nonzero entries in k-th row of U ... */ 1036 ba[k] = 1.0/dk; 1037 jmin = bi[k]; jmax = bi[k+1]; 1038 if (jmin < jmax) { 1039 for (j=jmin; j<jmax; j++){ 1040 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1041 } 1042 /* ... add k to row list for first nonzero entry in k-th row */ 1043 il[k] = jmin; 1044 i = bj[jmin]; 1045 jl[k] = jl[i]; jl[i] = k; 1046 } 1047 } 1048 1049 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1050 ierr = PetscFree(il);CHKERRQ(ierr); 1051 1052 C->factor = FACTOR_CHOLESKY; 1053 C->assembled = PETSC_TRUE; 1054 C->preallocated = PETSC_TRUE; 1055 PetscLogFlops(b->mbs); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1061 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1062 { 1063 int ierr; 1064 Mat C; 1065 1066 PetscFunctionBegin; 1067 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1068 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1069 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 1074