1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 /* 10 input: 11 A -- original matrix in SEQSBAIJ format 12 F -- symbolic or numeric factor of A 13 output: 14 F -- numeric factor of A if F is input as symbolic factor of A 15 nneg, nzero, npos: inertia of A 16 */ 17 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 20 int MatGetInertia_SeqSBAIJ(Mat A,Mat *F,int *nneig,int *nzero,int *npos) 21 { 22 PetscScalar *dd; 23 Mat_SeqSBAIJ *fact_ptr; 24 int ierr,m=A->m,i; 25 26 PetscFunctionBegin; 27 if (! (*F)->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for symfactor yet, use numfactor F"); 28 29 fact_ptr = (Mat_SeqSBAIJ*)(*F)->data; 30 dd = fact_ptr->a; 31 *nneig = 0; 32 for (i=0; i<m; i++){ 33 if (dd[i] < 0.0) (*nneig)++; 34 } 35 36 PetscFunctionReturn(0); 37 } 38 39 /* Using Modified Sparse Row (MSR) storage. 40 See page 85, "Iterative Methods ..." by Saad. */ 41 /* 42 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 43 */ 44 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 47 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 48 { 49 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 50 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 51 int *jutmp,bs = a->bs,bs2=a->bs2; 52 int m,realloc = 0,prow; 53 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 54 PetscTruth perm_identity; 55 56 PetscFunctionBegin; 57 58 /* check whether perm is the identity mapping */ 59 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 60 if (!perm_identity) a->permute = PETSC_TRUE; 61 62 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 63 64 if (perm_identity){ /* without permutation */ 65 ai = a->i; aj = a->j; 66 } else { /* non-trivial permutation */ 67 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 68 ai = a->inew; aj = a->jnew; 69 } 70 71 /* initialization */ 72 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 73 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 74 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 75 iu[0] = mbs+1; 76 juidx = mbs + 1; /* index for ju */ 77 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 78 q = jl + mbs; /* linked list for col index */ 79 for (i=0; i<mbs; i++){ 80 jl[i] = mbs; 81 q[i] = 0; 82 } 83 84 /* for each row k */ 85 for (k=0; k<mbs; k++){ 86 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 87 q[k] = mbs; 88 /* initialize nonzero structure of k-th row to row rip[k] of A */ 89 jmin = ai[rip[k]]; 90 jmax = ai[rip[k]+1]; 91 for (j=jmin; j<jmax; j++){ 92 vj = rip[aj[j]]; /* col. value */ 93 if(vj > k){ 94 qm = k; 95 do { 96 m = qm; qm = q[m]; 97 } while(qm < vj); 98 if (qm == vj) { 99 SETERRQ(1," error: duplicate entry in A\n"); 100 } 101 nzk++; 102 q[m] = vj; 103 q[vj] = qm; 104 } /* if(vj > k) */ 105 } /* for (j=jmin; j<jmax; j++) */ 106 107 /* modify nonzero structure of k-th row by computing fill-in 108 for each row i to be merged in */ 109 prow = k; 110 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 111 112 while (prow < k){ 113 /* merge row prow into k-th row */ 114 jmin = iu[prow] + 1; jmax = iu[prow+1]; 115 qm = k; 116 for (j=jmin; j<jmax; j++){ 117 vj = ju[j]; 118 do { 119 m = qm; qm = q[m]; 120 } while (qm < vj); 121 if (qm != vj){ 122 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 123 } 124 } 125 prow = jl[prow]; /* next pivot row */ 126 } 127 128 /* add k to row list for first nonzero element in k-th row */ 129 if (nzk > 0){ 130 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 131 jl[k] = jl[i]; jl[i] = k; 132 } 133 iu[k+1] = iu[k] + nzk; 134 135 /* allocate more space to ju if needed */ 136 if (iu[k+1] > umax) { 137 /* estimate how much additional space we will need */ 138 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 139 /* just double the memory each time */ 140 maxadd = umax; 141 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 142 umax += maxadd; 143 144 /* allocate a longer ju */ 145 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 146 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 147 ierr = PetscFree(ju);CHKERRQ(ierr); 148 ju = jutmp; 149 realloc++; /* count how many times we realloc */ 150 } 151 152 /* save nonzero structure of k-th row in ju */ 153 i=k; 154 while (nzk --) { 155 i = q[i]; 156 ju[juidx++] = i; 157 } 158 } 159 160 if (ai[mbs] != 0) { 161 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 162 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 163 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 164 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 165 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 166 } else { 167 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 168 } 169 170 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 171 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 172 ierr = PetscFree(jl);CHKERRQ(ierr); 173 174 /* put together the new matrix */ 175 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 176 /* PetscLogObjectParent(*B,iperm); */ 177 b = (Mat_SeqSBAIJ*)(*B)->data; 178 ierr = PetscFree(b->imax);CHKERRQ(ierr); 179 b->singlemalloc = PETSC_FALSE; 180 /* the next line frees the default space generated by the Create() */ 181 ierr = PetscFree(b->a);CHKERRQ(ierr); 182 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 183 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 184 b->j = ju; 185 b->i = iu; 186 b->diag = 0; 187 b->ilen = 0; 188 b->imax = 0; 189 b->row = perm; 190 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 191 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 192 b->icol = perm; 193 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 194 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 195 /* In b structure: Free imax, ilen, old a, old j. 196 Allocate idnew, solve_work, new a, new j */ 197 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 198 b->s_maxnz = b->s_nz = iu[mbs]; 199 200 (*B)->factor = FACTOR_CHOLESKY; 201 (*B)->info.factor_mallocs = realloc; 202 (*B)->info.fill_ratio_given = f; 203 if (ai[mbs] != 0) { 204 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 205 } else { 206 (*B)->info.fill_ratio_needed = 0.0; 207 } 208 209 if (perm_identity){ 210 switch (bs) { 211 case 1: 212 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 213 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 214 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 215 break; 216 case 2: 217 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 218 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 219 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 220 break; 221 case 3: 222 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 223 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 224 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 225 break; 226 case 4: 227 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 228 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 229 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 230 break; 231 case 5: 232 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 233 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 234 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 235 break; 236 case 6: 237 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 238 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 239 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 240 break; 241 case 7: 242 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 243 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 244 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 245 break; 246 default: 247 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 248 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 249 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 250 break; 251 } 252 } 253 254 PetscFunctionReturn(0); 255 } 256 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 260 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 261 { 262 Mat C = *B; 263 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 264 IS perm = b->row; 265 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 266 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 267 int bs=a->bs,bs2 = a->bs2; 268 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 269 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 270 MatScalar *work; 271 int *pivots; 272 273 PetscFunctionBegin; 274 275 /* initialization */ 276 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 277 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 278 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 279 jl = il + mbs; 280 for (i=0; i<mbs; i++) { 281 jl[i] = mbs; il[0] = 0; 282 } 283 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 284 uik = dk + bs2; 285 work = uik + bs2; 286 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 287 288 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 289 290 /* check permutation */ 291 if (!a->permute){ 292 ai = a->i; aj = a->j; aa = a->a; 293 } else { 294 ai = a->inew; aj = a->jnew; 295 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 296 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 297 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 298 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 299 300 for (i=0; i<mbs; i++){ 301 jmin = ai[i]; jmax = ai[i+1]; 302 for (j=jmin; j<jmax; j++){ 303 while (a2anew[j] != j){ 304 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 305 for (k1=0; k1<bs2; k1++){ 306 dk[k1] = aa[k*bs2+k1]; 307 aa[k*bs2+k1] = aa[j*bs2+k1]; 308 aa[j*bs2+k1] = dk[k1]; 309 } 310 } 311 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 312 if (i > aj[j]){ 313 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 314 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 315 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 316 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 317 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 318 } 319 } 320 } 321 } 322 ierr = PetscFree(a2anew);CHKERRQ(ierr); 323 } 324 325 /* for each row k */ 326 for (k = 0; k<mbs; k++){ 327 328 /*initialize k-th row with elements nonzero in row perm(k) of A */ 329 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 330 331 ap = aa + jmin*bs2; 332 for (j = jmin; j < jmax; j++){ 333 vj = perm_ptr[aj[j]]; /* block col. index */ 334 rtmp_ptr = rtmp + vj*bs2; 335 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 336 } 337 338 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 339 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 340 i = jl[k]; /* first row to be added to k_th row */ 341 342 while (i < k){ 343 nexti = jl[i]; /* next row to be added to k_th row */ 344 345 /* compute multiplier */ 346 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 347 348 /* uik = -inv(Di)*U_bar(i,k) */ 349 diag = ba + i*bs2; 350 u = ba + ili*bs2; 351 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 352 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 353 354 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 355 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 356 357 /* update -U(i,k) */ 358 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 359 360 /* add multiple of row i to k-th row ... */ 361 jmin = ili + 1; jmax = bi[i+1]; 362 if (jmin < jmax){ 363 for (j=jmin; j<jmax; j++) { 364 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 365 rtmp_ptr = rtmp + bj[j]*bs2; 366 u = ba + j*bs2; 367 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 368 } 369 370 /* ... add i to row list for next nonzero entry */ 371 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 372 j = bj[jmin]; 373 jl[i] = jl[j]; jl[j] = i; /* update jl */ 374 } 375 i = nexti; 376 } 377 378 /* save nonzero entries in k-th row of U ... */ 379 380 /* invert diagonal block */ 381 diag = ba+k*bs2; 382 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 383 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 384 385 jmin = bi[k]; jmax = bi[k+1]; 386 if (jmin < jmax) { 387 for (j=jmin; j<jmax; j++){ 388 vj = bj[j]; /* block col. index of U */ 389 u = ba + j*bs2; 390 rtmp_ptr = rtmp + vj*bs2; 391 for (k1=0; k1<bs2; k1++){ 392 *u++ = *rtmp_ptr; 393 *rtmp_ptr++ = 0.0; 394 } 395 } 396 397 /* ... add k to row list for first nonzero entry in k-th row */ 398 il[k] = jmin; 399 i = bj[jmin]; 400 jl[k] = jl[i]; jl[i] = k; 401 } 402 } 403 404 ierr = PetscFree(rtmp);CHKERRQ(ierr); 405 ierr = PetscFree(il);CHKERRQ(ierr); 406 ierr = PetscFree(dk);CHKERRQ(ierr); 407 ierr = PetscFree(pivots);CHKERRQ(ierr); 408 if (a->permute){ 409 ierr = PetscFree(aa);CHKERRQ(ierr); 410 } 411 412 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 413 C->factor = FACTOR_CHOLESKY; 414 C->assembled = PETSC_TRUE; 415 C->preallocated = PETSC_TRUE; 416 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 422 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 423 { 424 Mat C = *B; 425 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 426 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 427 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 428 int bs=a->bs,bs2 = a->bs2; 429 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 430 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 431 MatScalar *work; 432 int *pivots; 433 434 PetscFunctionBegin; 435 436 /* initialization */ 437 438 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 439 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 440 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 441 jl = il + mbs; 442 for (i=0; i<mbs; i++) { 443 jl[i] = mbs; il[0] = 0; 444 } 445 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 446 uik = dk + bs2; 447 work = uik + bs2; 448 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 449 450 ai = a->i; aj = a->j; aa = a->a; 451 452 /* for each row k */ 453 for (k = 0; k<mbs; k++){ 454 455 /*initialize k-th row with elements nonzero in row k of A */ 456 jmin = ai[k]; jmax = ai[k+1]; 457 ap = aa + jmin*bs2; 458 for (j = jmin; j < jmax; j++){ 459 vj = aj[j]; /* block col. index */ 460 rtmp_ptr = rtmp + vj*bs2; 461 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 462 } 463 464 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 465 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 466 i = jl[k]; /* first row to be added to k_th row */ 467 468 while (i < k){ 469 nexti = jl[i]; /* next row to be added to k_th row */ 470 471 /* compute multiplier */ 472 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 473 474 /* uik = -inv(Di)*U_bar(i,k) */ 475 diag = ba + i*bs2; 476 u = ba + ili*bs2; 477 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 478 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 479 480 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 481 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 482 483 /* update -U(i,k) */ 484 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 485 486 /* add multiple of row i to k-th row ... */ 487 jmin = ili + 1; jmax = bi[i+1]; 488 if (jmin < jmax){ 489 for (j=jmin; j<jmax; j++) { 490 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 491 rtmp_ptr = rtmp + bj[j]*bs2; 492 u = ba + j*bs2; 493 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 494 } 495 496 /* ... add i to row list for next nonzero entry */ 497 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 498 j = bj[jmin]; 499 jl[i] = jl[j]; jl[j] = i; /* update jl */ 500 } 501 i = nexti; 502 } 503 504 /* save nonzero entries in k-th row of U ... */ 505 506 /* invert diagonal block */ 507 diag = ba+k*bs2; 508 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 509 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 510 511 jmin = bi[k]; jmax = bi[k+1]; 512 if (jmin < jmax) { 513 for (j=jmin; j<jmax; j++){ 514 vj = bj[j]; /* block col. index of U */ 515 u = ba + j*bs2; 516 rtmp_ptr = rtmp + vj*bs2; 517 for (k1=0; k1<bs2; k1++){ 518 *u++ = *rtmp_ptr; 519 *rtmp_ptr++ = 0.0; 520 } 521 } 522 523 /* ... add k to row list for first nonzero entry in k-th row */ 524 il[k] = jmin; 525 i = bj[jmin]; 526 jl[k] = jl[i]; jl[i] = k; 527 } 528 } 529 530 ierr = PetscFree(rtmp);CHKERRQ(ierr); 531 ierr = PetscFree(il);CHKERRQ(ierr); 532 ierr = PetscFree(dk);CHKERRQ(ierr); 533 ierr = PetscFree(pivots);CHKERRQ(ierr); 534 535 C->factor = FACTOR_CHOLESKY; 536 C->assembled = PETSC_TRUE; 537 C->preallocated = PETSC_TRUE; 538 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 544 Version for blocks 2 by 2. 545 */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 548 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 549 { 550 Mat C = *B; 551 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 552 IS perm = b->row; 553 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 554 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 555 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 556 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 557 558 PetscFunctionBegin; 559 560 /* initialization */ 561 /* il and jl record the first nonzero element in each row of the accessing 562 window U(0:k, k:mbs-1). 563 jl: list of rows to be added to uneliminated rows 564 i>= k: jl(i) is the first row to be added to row i 565 i< k: jl(i) is the row following row i in some list of rows 566 jl(i) = mbs indicates the end of a list 567 il(i): points to the first nonzero element in columns k,...,mbs-1 of 568 row i of U */ 569 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 570 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 571 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 572 jl = il + mbs; 573 for (i=0; i<mbs; i++) { 574 jl[i] = mbs; il[0] = 0; 575 } 576 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 577 uik = dk + 4; 578 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 579 580 /* check permutation */ 581 if (!a->permute){ 582 ai = a->i; aj = a->j; aa = a->a; 583 } else { 584 ai = a->inew; aj = a->jnew; 585 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 586 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 587 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 588 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 589 590 for (i=0; i<mbs; i++){ 591 jmin = ai[i]; jmax = ai[i+1]; 592 for (j=jmin; j<jmax; j++){ 593 while (a2anew[j] != j){ 594 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 595 for (k1=0; k1<4; k1++){ 596 dk[k1] = aa[k*4+k1]; 597 aa[k*4+k1] = aa[j*4+k1]; 598 aa[j*4+k1] = dk[k1]; 599 } 600 } 601 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 602 if (i > aj[j]){ 603 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 604 ap = aa + j*4; /* ptr to the beginning of the block */ 605 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 606 ap[1] = ap[2]; 607 ap[2] = dk[1]; 608 } 609 } 610 } 611 ierr = PetscFree(a2anew);CHKERRQ(ierr); 612 } 613 614 /* for each row k */ 615 for (k = 0; k<mbs; k++){ 616 617 /*initialize k-th row with elements nonzero in row perm(k) of A */ 618 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 619 ap = aa + jmin*4; 620 for (j = jmin; j < jmax; j++){ 621 vj = perm_ptr[aj[j]]; /* block col. index */ 622 rtmp_ptr = rtmp + vj*4; 623 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 624 } 625 626 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 627 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 628 i = jl[k]; /* first row to be added to k_th row */ 629 630 while (i < k){ 631 nexti = jl[i]; /* next row to be added to k_th row */ 632 633 /* compute multiplier */ 634 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 635 636 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 637 diag = ba + i*4; 638 u = ba + ili*4; 639 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 640 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 641 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 642 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 643 644 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 645 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 646 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 647 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 648 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 649 650 /* update -U(i,k): ba[ili] = uik */ 651 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 652 653 /* add multiple of row i to k-th row ... */ 654 jmin = ili + 1; jmax = bi[i+1]; 655 if (jmin < jmax){ 656 for (j=jmin; j<jmax; j++) { 657 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 658 rtmp_ptr = rtmp + bj[j]*4; 659 u = ba + j*4; 660 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 661 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 662 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 663 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 664 } 665 666 /* ... add i to row list for next nonzero entry */ 667 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 668 j = bj[jmin]; 669 jl[i] = jl[j]; jl[j] = i; /* update jl */ 670 } 671 i = nexti; 672 } 673 674 /* save nonzero entries in k-th row of U ... */ 675 676 /* invert diagonal block */ 677 diag = ba+k*4; 678 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 679 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 680 681 jmin = bi[k]; jmax = bi[k+1]; 682 if (jmin < jmax) { 683 for (j=jmin; j<jmax; j++){ 684 vj = bj[j]; /* block col. index of U */ 685 u = ba + j*4; 686 rtmp_ptr = rtmp + vj*4; 687 for (k1=0; k1<4; k1++){ 688 *u++ = *rtmp_ptr; 689 *rtmp_ptr++ = 0.0; 690 } 691 } 692 693 /* ... add k to row list for first nonzero entry in k-th row */ 694 il[k] = jmin; 695 i = bj[jmin]; 696 jl[k] = jl[i]; jl[i] = k; 697 } 698 } 699 700 ierr = PetscFree(rtmp);CHKERRQ(ierr); 701 ierr = PetscFree(il);CHKERRQ(ierr); 702 ierr = PetscFree(dk);CHKERRQ(ierr); 703 if (a->permute) { 704 ierr = PetscFree(aa);CHKERRQ(ierr); 705 } 706 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 707 C->factor = FACTOR_CHOLESKY; 708 C->assembled = PETSC_TRUE; 709 C->preallocated = PETSC_TRUE; 710 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 711 PetscFunctionReturn(0); 712 } 713 714 /* 715 Version for when blocks are 2 by 2 Using natural ordering 716 */ 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 719 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 720 { 721 Mat C = *B; 722 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 723 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 724 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 725 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 726 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 727 728 PetscFunctionBegin; 729 730 /* initialization */ 731 /* il and jl record the first nonzero element in each row of the accessing 732 window U(0:k, k:mbs-1). 733 jl: list of rows to be added to uneliminated rows 734 i>= k: jl(i) is the first row to be added to row i 735 i< k: jl(i) is the row following row i in some list of rows 736 jl(i) = mbs indicates the end of a list 737 il(i): points to the first nonzero element in columns k,...,mbs-1 of 738 row i of U */ 739 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 740 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 741 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 742 jl = il + mbs; 743 for (i=0; i<mbs; i++) { 744 jl[i] = mbs; il[0] = 0; 745 } 746 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 747 uik = dk + 4; 748 749 ai = a->i; aj = a->j; aa = a->a; 750 751 /* for each row k */ 752 for (k = 0; k<mbs; k++){ 753 754 /*initialize k-th row with elements nonzero in row k of A */ 755 jmin = ai[k]; jmax = ai[k+1]; 756 ap = aa + jmin*4; 757 for (j = jmin; j < jmax; j++){ 758 vj = aj[j]; /* block col. index */ 759 rtmp_ptr = rtmp + vj*4; 760 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 761 } 762 763 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 764 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 765 i = jl[k]; /* first row to be added to k_th row */ 766 767 while (i < k){ 768 nexti = jl[i]; /* next row to be added to k_th row */ 769 770 /* compute multiplier */ 771 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 772 773 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 774 diag = ba + i*4; 775 u = ba + ili*4; 776 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 777 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 778 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 779 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 780 781 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 782 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 783 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 784 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 785 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 786 787 /* update -U(i,k): ba[ili] = uik */ 788 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 789 790 /* add multiple of row i to k-th row ... */ 791 jmin = ili + 1; jmax = bi[i+1]; 792 if (jmin < jmax){ 793 for (j=jmin; j<jmax; j++) { 794 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 795 rtmp_ptr = rtmp + bj[j]*4; 796 u = ba + j*4; 797 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 798 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 799 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 800 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 801 } 802 803 /* ... add i to row list for next nonzero entry */ 804 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 805 j = bj[jmin]; 806 jl[i] = jl[j]; jl[j] = i; /* update jl */ 807 } 808 i = nexti; 809 } 810 811 /* save nonzero entries in k-th row of U ... */ 812 813 /* invert diagonal block */ 814 diag = ba+k*4; 815 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 816 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 817 818 jmin = bi[k]; jmax = bi[k+1]; 819 if (jmin < jmax) { 820 for (j=jmin; j<jmax; j++){ 821 vj = bj[j]; /* block col. index of U */ 822 u = ba + j*4; 823 rtmp_ptr = rtmp + vj*4; 824 for (k1=0; k1<4; k1++){ 825 *u++ = *rtmp_ptr; 826 *rtmp_ptr++ = 0.0; 827 } 828 } 829 830 /* ... add k to row list for first nonzero entry in k-th row */ 831 il[k] = jmin; 832 i = bj[jmin]; 833 jl[k] = jl[i]; jl[i] = k; 834 } 835 } 836 837 ierr = PetscFree(rtmp);CHKERRQ(ierr); 838 ierr = PetscFree(il);CHKERRQ(ierr); 839 ierr = PetscFree(dk);CHKERRQ(ierr); 840 841 C->factor = FACTOR_CHOLESKY; 842 C->assembled = PETSC_TRUE; 843 C->preallocated = PETSC_TRUE; 844 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 850 Version for blocks are 1 by 1. 851 */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 854 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 855 { 856 Mat C = *B; 857 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 858 IS ip = b->row; 859 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 860 int *ai,*aj,*r; 861 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 862 MatScalar *rtmp; 863 MatScalar *ba = b->a,*aa,ak; 864 MatScalar dk,uikdi; 865 866 PetscFunctionBegin; 867 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 868 if (!a->permute){ 869 ai = a->i; aj = a->j; aa = a->a; 870 } else { 871 ai = a->inew; aj = a->jnew; 872 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 873 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 874 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 875 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 876 877 jmin = ai[0]; jmax = ai[mbs]; 878 for (j=jmin; j<jmax; j++){ 879 while (r[j] != j){ 880 k = r[j]; r[j] = r[k]; r[k] = k; 881 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 882 } 883 } 884 ierr = PetscFree(r);CHKERRQ(ierr); 885 } 886 887 /* initialization */ 888 /* il and jl record the first nonzero element in each row of the accessing 889 window U(0:k, k:mbs-1). 890 jl: list of rows to be added to uneliminated rows 891 i>= k: jl(i) is the first row to be added to row i 892 i< k: jl(i) is the row following row i in some list of rows 893 jl(i) = mbs indicates the end of a list 894 il(i): points to the first nonzero element in columns k,...,mbs-1 of 895 row i of U */ 896 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 897 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 898 jl = il + mbs; 899 for (i=0; i<mbs; i++) { 900 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 901 } 902 903 /* for each row k */ 904 for (k = 0; k<mbs; k++){ 905 906 /*initialize k-th row with elements nonzero in row perm(k) of A */ 907 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 908 909 for (j = jmin; j < jmax; j++){ 910 vj = rip[aj[j]]; 911 rtmp[vj] = aa[j]; 912 } 913 914 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 915 dk = rtmp[k]; 916 i = jl[k]; /* first row to be added to k_th row */ 917 918 while (i < k){ 919 nexti = jl[i]; /* next row to be added to k_th row */ 920 921 /* compute multiplier, update D(k) and U(i,k) */ 922 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 923 uikdi = - ba[ili]*ba[i]; 924 dk += uikdi*ba[ili]; 925 ba[ili] = uikdi; /* -U(i,k) */ 926 927 /* add multiple of row i to k-th row ... */ 928 jmin = ili + 1; jmax = bi[i+1]; 929 if (jmin < jmax){ 930 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 931 /* ... add i to row list for next nonzero entry */ 932 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 933 j = bj[jmin]; 934 jl[i] = jl[j]; jl[j] = i; /* update jl */ 935 } 936 i = nexti; 937 } 938 939 /* check for zero pivot and save diagoanl element */ 940 if (dk == 0.0){ 941 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 942 /* 943 } else if (PetscRealPart(dk) < 0.0){ 944 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 945 */ 946 } 947 948 /* save nonzero entries in k-th row of U ... */ 949 ba[k] = 1.0/dk; 950 jmin = bi[k]; jmax = bi[k+1]; 951 if (jmin < jmax) { 952 for (j=jmin; j<jmax; j++){ 953 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 954 } 955 /* ... add k to row list for first nonzero entry in k-th row */ 956 il[k] = jmin; 957 i = bj[jmin]; 958 jl[k] = jl[i]; jl[i] = k; 959 } 960 } 961 962 ierr = PetscFree(rtmp);CHKERRQ(ierr); 963 ierr = PetscFree(il);CHKERRQ(ierr); 964 if (a->permute){ 965 ierr = PetscFree(aa);CHKERRQ(ierr); 966 } 967 968 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 969 C->factor = FACTOR_CHOLESKY; 970 C->assembled = PETSC_TRUE; 971 C->preallocated = PETSC_TRUE; 972 PetscLogFlops(b->mbs); 973 PetscFunctionReturn(0); 974 } 975 976 /* 977 Version for when blocks are 1 by 1 Using natural ordering 978 */ 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 981 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 982 { 983 Mat C = *B; 984 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 985 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 986 int *ai,*aj; 987 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 988 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 989 990 PetscFunctionBegin; 991 992 /* initialization */ 993 /* il and jl record the first nonzero element in each row of the accessing 994 window U(0:k, k:mbs-1). 995 jl: list of rows to be added to uneliminated rows 996 i>= k: jl(i) is the first row to be added to row i 997 i< k: jl(i) is the row following row i in some list of rows 998 jl(i) = mbs indicates the end of a list 999 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1000 row i of U */ 1001 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1002 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1003 jl = il + mbs; 1004 for (i=0; i<mbs; i++) { 1005 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1006 } 1007 1008 ai = a->i; aj = a->j; aa = a->a; 1009 1010 /* for each row k */ 1011 for (k = 0; k<mbs; k++){ 1012 1013 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1014 jmin = ai[k]; jmax = ai[k+1]; 1015 1016 for (j = jmin; j < jmax; j++){ 1017 vj = aj[j]; 1018 rtmp[vj] = aa[j]; 1019 } 1020 1021 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1022 dk = rtmp[k]; 1023 i = jl[k]; /* first row to be added to k_th row */ 1024 1025 while (i < k){ 1026 nexti = jl[i]; /* next row to be added to k_th row */ 1027 1028 /* compute multiplier, update D(k) and U(i,k) */ 1029 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1030 uikdi = - ba[ili]*ba[i]; 1031 dk += uikdi*ba[ili]; 1032 ba[ili] = uikdi; /* -U(i,k) */ 1033 1034 /* add multiple of row i to k-th row ... */ 1035 jmin = ili + 1; jmax = bi[i+1]; 1036 if (jmin < jmax){ 1037 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1038 /* ... add i to row list for next nonzero entry */ 1039 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1040 j = bj[jmin]; 1041 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1042 } 1043 i = nexti; 1044 } 1045 1046 /* check for zero pivot and save diagoanl element */ 1047 if (dk == 0.0){ 1048 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1049 /* 1050 } else if (PetscRealPart(dk) < 0){ 1051 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1052 */ 1053 } 1054 1055 /* save nonzero entries in k-th row of U ... */ 1056 ba[k] = 1.0/dk; 1057 jmin = bi[k]; jmax = bi[k+1]; 1058 if (jmin < jmax) { 1059 for (j=jmin; j<jmax; j++){ 1060 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1061 } 1062 /* ... add k to row list for first nonzero entry in k-th row */ 1063 il[k] = jmin; 1064 i = bj[jmin]; 1065 jl[k] = jl[i]; jl[i] = k; 1066 } 1067 } 1068 1069 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1070 ierr = PetscFree(il);CHKERRQ(ierr); 1071 1072 C->factor = FACTOR_CHOLESKY; 1073 C->assembled = PETSC_TRUE; 1074 C->preallocated = PETSC_TRUE; 1075 PetscLogFlops(b->mbs); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1081 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1082 { 1083 int ierr; 1084 Mat C; 1085 1086 PetscFunctionBegin; 1087 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1088 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1089 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 1094