1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 #if !defined(PETSC_USE_COMPLEX) 10 /* 11 input: 12 F -- numeric factor 13 output: 14 nneg, nzero, npos: matrix inertia 15 */ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 19 int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 20 { 21 PetscScalar *dd; 22 Mat_SeqSBAIJ *fact_ptr; 23 int m = F->m,i; 24 25 PetscFunctionBegin; 26 if (!F->assembled) { 27 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Numeric factor mat is not assembled"); 28 } 29 fact_ptr = (Mat_SeqSBAIJ*)F->data; 30 dd = fact_ptr->a; 31 *nneig = 0; 32 for (i=0; i<m; i++){ 33 if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 34 } 35 36 PetscFunctionReturn(0); 37 } 38 #endif /* !defined(PETSC_USE_COMPLEX) */ 39 40 /* Using Modified Sparse Row (MSR) storage. 41 See page 85, "Iterative Methods ..." by Saad. */ 42 /* 43 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 44 */ 45 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 48 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 49 { 50 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 51 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 52 int *jutmp,bs = a->bs,bs2=a->bs2; 53 int m,realloc = 0,prow; 54 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 55 PetscTruth perm_identity; 56 57 PetscFunctionBegin; 58 59 /* check whether perm is the identity mapping */ 60 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 61 if (!perm_identity) a->permute = PETSC_TRUE; 62 63 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 64 65 if (perm_identity){ /* without permutation */ 66 ai = a->i; aj = a->j; 67 } else { /* non-trivial permutation */ 68 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 69 ai = a->inew; aj = a->jnew; 70 } 71 72 /* initialization */ 73 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 74 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 75 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 76 iu[0] = mbs+1; 77 juidx = mbs + 1; /* index for ju */ 78 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 79 q = jl + mbs; /* linked list for col index */ 80 for (i=0; i<mbs; i++){ 81 jl[i] = mbs; 82 q[i] = 0; 83 } 84 85 /* for each row k */ 86 for (k=0; k<mbs; k++){ 87 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 88 q[k] = mbs; 89 /* initialize nonzero structure of k-th row to row rip[k] of A */ 90 jmin = ai[rip[k]]; 91 jmax = ai[rip[k]+1]; 92 for (j=jmin; j<jmax; j++){ 93 vj = rip[aj[j]]; /* col. value */ 94 if(vj > k){ 95 qm = k; 96 do { 97 m = qm; qm = q[m]; 98 } while(qm < vj); 99 if (qm == vj) { 100 SETERRQ(1," error: duplicate entry in A\n"); 101 } 102 nzk++; 103 q[m] = vj; 104 q[vj] = qm; 105 } /* if(vj > k) */ 106 } /* for (j=jmin; j<jmax; j++) */ 107 108 /* modify nonzero structure of k-th row by computing fill-in 109 for each row i to be merged in */ 110 prow = k; 111 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 112 113 while (prow < k){ 114 /* merge row prow into k-th row */ 115 jmin = iu[prow] + 1; jmax = iu[prow+1]; 116 qm = k; 117 for (j=jmin; j<jmax; j++){ 118 vj = ju[j]; 119 do { 120 m = qm; qm = q[m]; 121 } while (qm < vj); 122 if (qm != vj){ 123 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 124 } 125 } 126 prow = jl[prow]; /* next pivot row */ 127 } 128 129 /* add k to row list for first nonzero element in k-th row */ 130 if (nzk > 0){ 131 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 132 jl[k] = jl[i]; jl[i] = k; 133 } 134 iu[k+1] = iu[k] + nzk; 135 136 /* allocate more space to ju if needed */ 137 if (iu[k+1] > umax) { 138 /* estimate how much additional space we will need */ 139 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 140 /* just double the memory each time */ 141 maxadd = umax; 142 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 143 umax += maxadd; 144 145 /* allocate a longer ju */ 146 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 147 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 148 ierr = PetscFree(ju);CHKERRQ(ierr); 149 ju = jutmp; 150 realloc++; /* count how many times we realloc */ 151 } 152 153 /* save nonzero structure of k-th row in ju */ 154 i=k; 155 while (nzk --) { 156 i = q[i]; 157 ju[juidx++] = i; 158 } 159 } 160 161 if (ai[mbs] != 0) { 162 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 163 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 164 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 165 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 166 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 167 } else { 168 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 169 } 170 171 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 172 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 173 ierr = PetscFree(jl);CHKERRQ(ierr); 174 175 /* put together the new matrix */ 176 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 177 /* PetscLogObjectParent(*B,iperm); */ 178 b = (Mat_SeqSBAIJ*)(*B)->data; 179 ierr = PetscFree(b->imax);CHKERRQ(ierr); 180 b->singlemalloc = PETSC_FALSE; 181 /* the next line frees the default space generated by the Create() */ 182 ierr = PetscFree(b->a);CHKERRQ(ierr); 183 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 184 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 185 b->j = ju; 186 b->i = iu; 187 b->diag = 0; 188 b->ilen = 0; 189 b->imax = 0; 190 b->row = perm; 191 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 192 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 193 b->icol = perm; 194 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 195 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 196 /* In b structure: Free imax, ilen, old a, old j. 197 Allocate idnew, solve_work, new a, new j */ 198 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 199 b->s_maxnz = b->s_nz = iu[mbs]; 200 201 (*B)->factor = FACTOR_CHOLESKY; 202 (*B)->info.factor_mallocs = realloc; 203 (*B)->info.fill_ratio_given = f; 204 if (ai[mbs] != 0) { 205 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 206 } else { 207 (*B)->info.fill_ratio_needed = 0.0; 208 } 209 210 if (perm_identity){ 211 switch (bs) { 212 case 1: 213 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 214 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 215 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 216 break; 217 case 2: 218 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 219 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 220 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 221 break; 222 case 3: 223 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 224 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 225 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 226 break; 227 case 4: 228 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 229 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 230 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 231 break; 232 case 5: 233 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 234 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 235 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 236 break; 237 case 6: 238 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 239 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 240 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 241 break; 242 case 7: 243 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 244 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 245 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 246 break; 247 default: 248 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 249 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 250 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 251 break; 252 } 253 } 254 255 PetscFunctionReturn(0); 256 } 257 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 261 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 262 { 263 Mat C = *B; 264 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 265 IS perm = b->row; 266 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 267 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 268 int bs=a->bs,bs2 = a->bs2; 269 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 270 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 271 MatScalar *work; 272 int *pivots; 273 274 PetscFunctionBegin; 275 276 /* initialization */ 277 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 278 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 279 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 280 jl = il + mbs; 281 for (i=0; i<mbs; i++) { 282 jl[i] = mbs; il[0] = 0; 283 } 284 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 285 uik = dk + bs2; 286 work = uik + bs2; 287 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 288 289 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 290 291 /* check permutation */ 292 if (!a->permute){ 293 ai = a->i; aj = a->j; aa = a->a; 294 } else { 295 ai = a->inew; aj = a->jnew; 296 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 297 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 298 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 299 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 300 301 for (i=0; i<mbs; i++){ 302 jmin = ai[i]; jmax = ai[i+1]; 303 for (j=jmin; j<jmax; j++){ 304 while (a2anew[j] != j){ 305 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 306 for (k1=0; k1<bs2; k1++){ 307 dk[k1] = aa[k*bs2+k1]; 308 aa[k*bs2+k1] = aa[j*bs2+k1]; 309 aa[j*bs2+k1] = dk[k1]; 310 } 311 } 312 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 313 if (i > aj[j]){ 314 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 315 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 316 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 317 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 318 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 319 } 320 } 321 } 322 } 323 ierr = PetscFree(a2anew);CHKERRQ(ierr); 324 } 325 326 /* for each row k */ 327 for (k = 0; k<mbs; k++){ 328 329 /*initialize k-th row with elements nonzero in row perm(k) of A */ 330 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 331 332 ap = aa + jmin*bs2; 333 for (j = jmin; j < jmax; j++){ 334 vj = perm_ptr[aj[j]]; /* block col. index */ 335 rtmp_ptr = rtmp + vj*bs2; 336 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 337 } 338 339 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 340 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 341 i = jl[k]; /* first row to be added to k_th row */ 342 343 while (i < k){ 344 nexti = jl[i]; /* next row to be added to k_th row */ 345 346 /* compute multiplier */ 347 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 348 349 /* uik = -inv(Di)*U_bar(i,k) */ 350 diag = ba + i*bs2; 351 u = ba + ili*bs2; 352 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 353 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 354 355 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 356 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 357 358 /* update -U(i,k) */ 359 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 360 361 /* add multiple of row i to k-th row ... */ 362 jmin = ili + 1; jmax = bi[i+1]; 363 if (jmin < jmax){ 364 for (j=jmin; j<jmax; j++) { 365 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 366 rtmp_ptr = rtmp + bj[j]*bs2; 367 u = ba + j*bs2; 368 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 369 } 370 371 /* ... add i to row list for next nonzero entry */ 372 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 373 j = bj[jmin]; 374 jl[i] = jl[j]; jl[j] = i; /* update jl */ 375 } 376 i = nexti; 377 } 378 379 /* save nonzero entries in k-th row of U ... */ 380 381 /* invert diagonal block */ 382 diag = ba+k*bs2; 383 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 384 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 385 386 jmin = bi[k]; jmax = bi[k+1]; 387 if (jmin < jmax) { 388 for (j=jmin; j<jmax; j++){ 389 vj = bj[j]; /* block col. index of U */ 390 u = ba + j*bs2; 391 rtmp_ptr = rtmp + vj*bs2; 392 for (k1=0; k1<bs2; k1++){ 393 *u++ = *rtmp_ptr; 394 *rtmp_ptr++ = 0.0; 395 } 396 } 397 398 /* ... add k to row list for first nonzero entry in k-th row */ 399 il[k] = jmin; 400 i = bj[jmin]; 401 jl[k] = jl[i]; jl[i] = k; 402 } 403 } 404 405 ierr = PetscFree(rtmp);CHKERRQ(ierr); 406 ierr = PetscFree(il);CHKERRQ(ierr); 407 ierr = PetscFree(dk);CHKERRQ(ierr); 408 ierr = PetscFree(pivots);CHKERRQ(ierr); 409 if (a->permute){ 410 ierr = PetscFree(aa);CHKERRQ(ierr); 411 } 412 413 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 414 C->factor = FACTOR_CHOLESKY; 415 C->assembled = PETSC_TRUE; 416 C->preallocated = PETSC_TRUE; 417 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 423 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 424 { 425 Mat C = *B; 426 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 427 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 428 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 429 int bs=a->bs,bs2 = a->bs2; 430 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 431 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 432 MatScalar *work; 433 int *pivots; 434 435 PetscFunctionBegin; 436 437 /* initialization */ 438 439 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 440 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 441 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 442 jl = il + mbs; 443 for (i=0; i<mbs; i++) { 444 jl[i] = mbs; il[0] = 0; 445 } 446 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 447 uik = dk + bs2; 448 work = uik + bs2; 449 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 450 451 ai = a->i; aj = a->j; aa = a->a; 452 453 /* for each row k */ 454 for (k = 0; k<mbs; k++){ 455 456 /*initialize k-th row with elements nonzero in row k of A */ 457 jmin = ai[k]; jmax = ai[k+1]; 458 ap = aa + jmin*bs2; 459 for (j = jmin; j < jmax; j++){ 460 vj = aj[j]; /* block col. index */ 461 rtmp_ptr = rtmp + vj*bs2; 462 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 463 } 464 465 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 466 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 467 i = jl[k]; /* first row to be added to k_th row */ 468 469 while (i < k){ 470 nexti = jl[i]; /* next row to be added to k_th row */ 471 472 /* compute multiplier */ 473 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 474 475 /* uik = -inv(Di)*U_bar(i,k) */ 476 diag = ba + i*bs2; 477 u = ba + ili*bs2; 478 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 479 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 480 481 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 482 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 483 484 /* update -U(i,k) */ 485 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 486 487 /* add multiple of row i to k-th row ... */ 488 jmin = ili + 1; jmax = bi[i+1]; 489 if (jmin < jmax){ 490 for (j=jmin; j<jmax; j++) { 491 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 492 rtmp_ptr = rtmp + bj[j]*bs2; 493 u = ba + j*bs2; 494 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 495 } 496 497 /* ... add i to row list for next nonzero entry */ 498 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 499 j = bj[jmin]; 500 jl[i] = jl[j]; jl[j] = i; /* update jl */ 501 } 502 i = nexti; 503 } 504 505 /* save nonzero entries in k-th row of U ... */ 506 507 /* invert diagonal block */ 508 diag = ba+k*bs2; 509 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 510 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 511 512 jmin = bi[k]; jmax = bi[k+1]; 513 if (jmin < jmax) { 514 for (j=jmin; j<jmax; j++){ 515 vj = bj[j]; /* block col. index of U */ 516 u = ba + j*bs2; 517 rtmp_ptr = rtmp + vj*bs2; 518 for (k1=0; k1<bs2; k1++){ 519 *u++ = *rtmp_ptr; 520 *rtmp_ptr++ = 0.0; 521 } 522 } 523 524 /* ... add k to row list for first nonzero entry in k-th row */ 525 il[k] = jmin; 526 i = bj[jmin]; 527 jl[k] = jl[i]; jl[i] = k; 528 } 529 } 530 531 ierr = PetscFree(rtmp);CHKERRQ(ierr); 532 ierr = PetscFree(il);CHKERRQ(ierr); 533 ierr = PetscFree(dk);CHKERRQ(ierr); 534 ierr = PetscFree(pivots);CHKERRQ(ierr); 535 536 C->factor = FACTOR_CHOLESKY; 537 C->assembled = PETSC_TRUE; 538 C->preallocated = PETSC_TRUE; 539 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 540 PetscFunctionReturn(0); 541 } 542 543 /* 544 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 545 Version for blocks 2 by 2. 546 */ 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 549 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 550 { 551 Mat C = *B; 552 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 553 IS perm = b->row; 554 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 555 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 556 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 557 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 558 559 PetscFunctionBegin; 560 561 /* initialization */ 562 /* il and jl record the first nonzero element in each row of the accessing 563 window U(0:k, k:mbs-1). 564 jl: list of rows to be added to uneliminated rows 565 i>= k: jl(i) is the first row to be added to row i 566 i< k: jl(i) is the row following row i in some list of rows 567 jl(i) = mbs indicates the end of a list 568 il(i): points to the first nonzero element in columns k,...,mbs-1 of 569 row i of U */ 570 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 571 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 572 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 573 jl = il + mbs; 574 for (i=0; i<mbs; i++) { 575 jl[i] = mbs; il[0] = 0; 576 } 577 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 578 uik = dk + 4; 579 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 580 581 /* check permutation */ 582 if (!a->permute){ 583 ai = a->i; aj = a->j; aa = a->a; 584 } else { 585 ai = a->inew; aj = a->jnew; 586 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 587 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 588 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 589 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 590 591 for (i=0; i<mbs; i++){ 592 jmin = ai[i]; jmax = ai[i+1]; 593 for (j=jmin; j<jmax; j++){ 594 while (a2anew[j] != j){ 595 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 596 for (k1=0; k1<4; k1++){ 597 dk[k1] = aa[k*4+k1]; 598 aa[k*4+k1] = aa[j*4+k1]; 599 aa[j*4+k1] = dk[k1]; 600 } 601 } 602 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 603 if (i > aj[j]){ 604 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 605 ap = aa + j*4; /* ptr to the beginning of the block */ 606 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 607 ap[1] = ap[2]; 608 ap[2] = dk[1]; 609 } 610 } 611 } 612 ierr = PetscFree(a2anew);CHKERRQ(ierr); 613 } 614 615 /* for each row k */ 616 for (k = 0; k<mbs; k++){ 617 618 /*initialize k-th row with elements nonzero in row perm(k) of A */ 619 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 620 ap = aa + jmin*4; 621 for (j = jmin; j < jmax; j++){ 622 vj = perm_ptr[aj[j]]; /* block col. index */ 623 rtmp_ptr = rtmp + vj*4; 624 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 625 } 626 627 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 628 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 629 i = jl[k]; /* first row to be added to k_th row */ 630 631 while (i < k){ 632 nexti = jl[i]; /* next row to be added to k_th row */ 633 634 /* compute multiplier */ 635 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 636 637 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 638 diag = ba + i*4; 639 u = ba + ili*4; 640 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 641 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 642 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 643 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 644 645 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 646 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 647 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 648 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 649 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 650 651 /* update -U(i,k): ba[ili] = uik */ 652 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 653 654 /* add multiple of row i to k-th row ... */ 655 jmin = ili + 1; jmax = bi[i+1]; 656 if (jmin < jmax){ 657 for (j=jmin; j<jmax; j++) { 658 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 659 rtmp_ptr = rtmp + bj[j]*4; 660 u = ba + j*4; 661 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 662 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 663 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 664 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 665 } 666 667 /* ... add i to row list for next nonzero entry */ 668 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 669 j = bj[jmin]; 670 jl[i] = jl[j]; jl[j] = i; /* update jl */ 671 } 672 i = nexti; 673 } 674 675 /* save nonzero entries in k-th row of U ... */ 676 677 /* invert diagonal block */ 678 diag = ba+k*4; 679 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 680 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 681 682 jmin = bi[k]; jmax = bi[k+1]; 683 if (jmin < jmax) { 684 for (j=jmin; j<jmax; j++){ 685 vj = bj[j]; /* block col. index of U */ 686 u = ba + j*4; 687 rtmp_ptr = rtmp + vj*4; 688 for (k1=0; k1<4; k1++){ 689 *u++ = *rtmp_ptr; 690 *rtmp_ptr++ = 0.0; 691 } 692 } 693 694 /* ... add k to row list for first nonzero entry in k-th row */ 695 il[k] = jmin; 696 i = bj[jmin]; 697 jl[k] = jl[i]; jl[i] = k; 698 } 699 } 700 701 ierr = PetscFree(rtmp);CHKERRQ(ierr); 702 ierr = PetscFree(il);CHKERRQ(ierr); 703 ierr = PetscFree(dk);CHKERRQ(ierr); 704 if (a->permute) { 705 ierr = PetscFree(aa);CHKERRQ(ierr); 706 } 707 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 708 C->factor = FACTOR_CHOLESKY; 709 C->assembled = PETSC_TRUE; 710 C->preallocated = PETSC_TRUE; 711 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 712 PetscFunctionReturn(0); 713 } 714 715 /* 716 Version for when blocks are 2 by 2 Using natural ordering 717 */ 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 720 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 721 { 722 Mat C = *B; 723 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 724 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 725 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 726 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 727 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 728 729 PetscFunctionBegin; 730 731 /* initialization */ 732 /* il and jl record the first nonzero element in each row of the accessing 733 window U(0:k, k:mbs-1). 734 jl: list of rows to be added to uneliminated rows 735 i>= k: jl(i) is the first row to be added to row i 736 i< k: jl(i) is the row following row i in some list of rows 737 jl(i) = mbs indicates the end of a list 738 il(i): points to the first nonzero element in columns k,...,mbs-1 of 739 row i of U */ 740 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 741 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 742 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 743 jl = il + mbs; 744 for (i=0; i<mbs; i++) { 745 jl[i] = mbs; il[0] = 0; 746 } 747 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 748 uik = dk + 4; 749 750 ai = a->i; aj = a->j; aa = a->a; 751 752 /* for each row k */ 753 for (k = 0; k<mbs; k++){ 754 755 /*initialize k-th row with elements nonzero in row k of A */ 756 jmin = ai[k]; jmax = ai[k+1]; 757 ap = aa + jmin*4; 758 for (j = jmin; j < jmax; j++){ 759 vj = aj[j]; /* block col. index */ 760 rtmp_ptr = rtmp + vj*4; 761 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 762 } 763 764 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 765 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 766 i = jl[k]; /* first row to be added to k_th row */ 767 768 while (i < k){ 769 nexti = jl[i]; /* next row to be added to k_th row */ 770 771 /* compute multiplier */ 772 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 773 774 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 775 diag = ba + i*4; 776 u = ba + ili*4; 777 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 778 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 779 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 780 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 781 782 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 783 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 784 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 785 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 786 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 787 788 /* update -U(i,k): ba[ili] = uik */ 789 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 790 791 /* add multiple of row i to k-th row ... */ 792 jmin = ili + 1; jmax = bi[i+1]; 793 if (jmin < jmax){ 794 for (j=jmin; j<jmax; j++) { 795 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 796 rtmp_ptr = rtmp + bj[j]*4; 797 u = ba + j*4; 798 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 799 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 800 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 801 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 802 } 803 804 /* ... add i to row list for next nonzero entry */ 805 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 806 j = bj[jmin]; 807 jl[i] = jl[j]; jl[j] = i; /* update jl */ 808 } 809 i = nexti; 810 } 811 812 /* save nonzero entries in k-th row of U ... */ 813 814 /* invert diagonal block */ 815 diag = ba+k*4; 816 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 817 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 818 819 jmin = bi[k]; jmax = bi[k+1]; 820 if (jmin < jmax) { 821 for (j=jmin; j<jmax; j++){ 822 vj = bj[j]; /* block col. index of U */ 823 u = ba + j*4; 824 rtmp_ptr = rtmp + vj*4; 825 for (k1=0; k1<4; k1++){ 826 *u++ = *rtmp_ptr; 827 *rtmp_ptr++ = 0.0; 828 } 829 } 830 831 /* ... add k to row list for first nonzero entry in k-th row */ 832 il[k] = jmin; 833 i = bj[jmin]; 834 jl[k] = jl[i]; jl[i] = k; 835 } 836 } 837 838 ierr = PetscFree(rtmp);CHKERRQ(ierr); 839 ierr = PetscFree(il);CHKERRQ(ierr); 840 ierr = PetscFree(dk);CHKERRQ(ierr); 841 842 C->factor = FACTOR_CHOLESKY; 843 C->assembled = PETSC_TRUE; 844 C->preallocated = PETSC_TRUE; 845 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 846 PetscFunctionReturn(0); 847 } 848 849 /* 850 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 851 Version for blocks are 1 by 1. 852 */ 853 #undef __FUNCT__ 854 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 855 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 856 { 857 Mat C = *B; 858 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 859 IS ip = b->row; 860 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 861 int *ai,*aj,*r; 862 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 863 MatScalar *rtmp; 864 MatScalar *ba = b->a,*aa,ak; 865 MatScalar dk,uikdi; 866 867 PetscFunctionBegin; 868 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 869 if (!a->permute){ 870 ai = a->i; aj = a->j; aa = a->a; 871 } else { 872 ai = a->inew; aj = a->jnew; 873 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 874 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 875 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 876 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 877 878 jmin = ai[0]; jmax = ai[mbs]; 879 for (j=jmin; j<jmax; j++){ 880 while (r[j] != j){ 881 k = r[j]; r[j] = r[k]; r[k] = k; 882 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 883 } 884 } 885 ierr = PetscFree(r);CHKERRQ(ierr); 886 } 887 888 /* initialization */ 889 /* il and jl record the first nonzero element in each row of the accessing 890 window U(0:k, k:mbs-1). 891 jl: list of rows to be added to uneliminated rows 892 i>= k: jl(i) is the first row to be added to row i 893 i< k: jl(i) is the row following row i in some list of rows 894 jl(i) = mbs indicates the end of a list 895 il(i): points to the first nonzero element in columns k,...,mbs-1 of 896 row i of U */ 897 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 898 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 899 jl = il + mbs; 900 for (i=0; i<mbs; i++) { 901 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 902 } 903 904 /* for each row k */ 905 for (k = 0; k<mbs; k++){ 906 907 /*initialize k-th row with elements nonzero in row perm(k) of A */ 908 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 909 910 for (j = jmin; j < jmax; j++){ 911 vj = rip[aj[j]]; 912 rtmp[vj] = aa[j]; 913 } 914 915 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 916 dk = rtmp[k]; 917 i = jl[k]; /* first row to be added to k_th row */ 918 919 while (i < k){ 920 nexti = jl[i]; /* next row to be added to k_th row */ 921 922 /* compute multiplier, update D(k) and U(i,k) */ 923 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 924 uikdi = - ba[ili]*ba[i]; 925 dk += uikdi*ba[ili]; 926 ba[ili] = uikdi; /* -U(i,k) */ 927 928 /* add multiple of row i to k-th row ... */ 929 jmin = ili + 1; jmax = bi[i+1]; 930 if (jmin < jmax){ 931 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 932 /* ... add i to row list for next nonzero entry */ 933 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 934 j = bj[jmin]; 935 jl[i] = jl[j]; jl[j] = i; /* update jl */ 936 } 937 i = nexti; 938 } 939 940 /* check for zero pivot and save diagoanl element */ 941 if (dk == 0.0){ 942 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 943 /* 944 } else if (PetscRealPart(dk) < 0.0){ 945 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 946 */ 947 } 948 949 /* save nonzero entries in k-th row of U ... */ 950 ba[k] = 1.0/dk; 951 jmin = bi[k]; jmax = bi[k+1]; 952 if (jmin < jmax) { 953 for (j=jmin; j<jmax; j++){ 954 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 955 } 956 /* ... add k to row list for first nonzero entry in k-th row */ 957 il[k] = jmin; 958 i = bj[jmin]; 959 jl[k] = jl[i]; jl[i] = k; 960 } 961 } 962 963 ierr = PetscFree(rtmp);CHKERRQ(ierr); 964 ierr = PetscFree(il);CHKERRQ(ierr); 965 if (a->permute){ 966 ierr = PetscFree(aa);CHKERRQ(ierr); 967 } 968 969 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 970 C->factor = FACTOR_CHOLESKY; 971 C->assembled = PETSC_TRUE; 972 C->preallocated = PETSC_TRUE; 973 PetscLogFlops(b->mbs); 974 PetscFunctionReturn(0); 975 } 976 977 /* 978 Version for when blocks are 1 by 1 Using natural ordering 979 */ 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 982 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 983 { 984 Mat C = *B; 985 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 986 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 987 int *ai,*aj; 988 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 989 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 990 991 PetscFunctionBegin; 992 993 /* initialization */ 994 /* il and jl record the first nonzero element in each row of the accessing 995 window U(0:k, k:mbs-1). 996 jl: list of rows to be added to uneliminated rows 997 i>= k: jl(i) is the first row to be added to row i 998 i< k: jl(i) is the row following row i in some list of rows 999 jl(i) = mbs indicates the end of a list 1000 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1001 row i of U */ 1002 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1003 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1004 jl = il + mbs; 1005 for (i=0; i<mbs; i++) { 1006 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1007 } 1008 1009 ai = a->i; aj = a->j; aa = a->a; 1010 1011 /* for each row k */ 1012 for (k = 0; k<mbs; k++){ 1013 1014 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1015 jmin = ai[k]; jmax = ai[k+1]; 1016 1017 for (j = jmin; j < jmax; j++){ 1018 vj = aj[j]; 1019 rtmp[vj] = aa[j]; 1020 } 1021 1022 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1023 dk = rtmp[k]; 1024 i = jl[k]; /* first row to be added to k_th row */ 1025 1026 while (i < k){ 1027 nexti = jl[i]; /* next row to be added to k_th row */ 1028 1029 /* compute multiplier, update D(k) and U(i,k) */ 1030 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1031 uikdi = - ba[ili]*ba[i]; 1032 dk += uikdi*ba[ili]; 1033 ba[ili] = uikdi; /* -U(i,k) */ 1034 1035 /* add multiple of row i to k-th row ... */ 1036 jmin = ili + 1; jmax = bi[i+1]; 1037 if (jmin < jmax){ 1038 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1039 /* ... add i to row list for next nonzero entry */ 1040 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1041 j = bj[jmin]; 1042 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1043 } 1044 i = nexti; 1045 } 1046 1047 /* check for zero pivot and save diagoanl element */ 1048 if (dk == 0.0){ 1049 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1050 /* 1051 } else if (PetscRealPart(dk) < 0){ 1052 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1053 */ 1054 } 1055 1056 /* save nonzero entries in k-th row of U ... */ 1057 ba[k] = 1.0/dk; 1058 jmin = bi[k]; jmax = bi[k+1]; 1059 if (jmin < jmax) { 1060 for (j=jmin; j<jmax; j++){ 1061 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1062 } 1063 /* ... add k to row list for first nonzero entry in k-th row */ 1064 il[k] = jmin; 1065 i = bj[jmin]; 1066 jl[k] = jl[i]; jl[i] = k; 1067 } 1068 } 1069 1070 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1071 ierr = PetscFree(il);CHKERRQ(ierr); 1072 1073 C->factor = FACTOR_CHOLESKY; 1074 C->assembled = PETSC_TRUE; 1075 C->preallocated = PETSC_TRUE; 1076 PetscLogFlops(b->mbs); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1082 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1083 { 1084 int ierr; 1085 Mat C; 1086 1087 PetscFunctionBegin; 1088 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1089 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1090 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 1095