1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 #if !defined(PETSC_USE_COMPLEX) 10 /* 11 input: 12 F -- numeric factor 13 output: 14 nneg, nzero, npos: matrix inertia 15 */ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 19 int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 20 { 21 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 22 PetscScalar *dd = fact_ptr->a; 23 int m = F->m,i; 24 25 PetscFunctionBegin; 26 if (nneig){ 27 *nneig = 0; 28 for (i=0; i<m; i++){ 29 if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 30 } 31 } 32 if (nzero){ 33 *nzero = 0; 34 for (i=0; i<m; i++){ 35 if (PetscRealPart(dd[i]) == 0.0) (*nzero)++; 36 } 37 } 38 if (npos){ 39 *npos = 0; 40 for (i=0; i<m; i++){ 41 if (PetscRealPart(dd[i]) > 0.0) (*npos)++; 42 } 43 } 44 PetscFunctionReturn(0); 45 } 46 #endif /* !defined(PETSC_USE_COMPLEX) */ 47 48 /* Using Modified Sparse Row (MSR) storage. 49 See page 85, "Iterative Methods ..." by Saad. */ 50 /* 51 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 52 */ 53 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 56 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 59 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 60 int *jutmp,bs = a->bs,bs2=a->bs2; 61 int m,realloc = 0,prow; 62 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 63 PetscTruth perm_identity; 64 65 PetscFunctionBegin; 66 67 /* check whether perm is the identity mapping */ 68 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 69 if (!perm_identity) a->permute = PETSC_TRUE; 70 71 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 72 73 if (perm_identity){ /* without permutation */ 74 ai = a->i; aj = a->j; 75 } else { /* non-trivial permutation */ 76 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 77 ai = a->inew; aj = a->jnew; 78 } 79 80 /* initialization */ 81 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 82 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 83 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 84 iu[0] = mbs+1; 85 juidx = mbs + 1; /* index for ju */ 86 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 87 q = jl + mbs; /* linked list for col index */ 88 for (i=0; i<mbs; i++){ 89 jl[i] = mbs; 90 q[i] = 0; 91 } 92 93 /* for each row k */ 94 for (k=0; k<mbs; k++){ 95 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 96 q[k] = mbs; 97 /* initialize nonzero structure of k-th row to row rip[k] of A */ 98 jmin = ai[rip[k]]; 99 jmax = ai[rip[k]+1]; 100 for (j=jmin; j<jmax; j++){ 101 vj = rip[aj[j]]; /* col. value */ 102 if(vj > k){ 103 qm = k; 104 do { 105 m = qm; qm = q[m]; 106 } while(qm < vj); 107 if (qm == vj) { 108 SETERRQ(1," error: duplicate entry in A\n"); 109 } 110 nzk++; 111 q[m] = vj; 112 q[vj] = qm; 113 } /* if(vj > k) */ 114 } /* for (j=jmin; j<jmax; j++) */ 115 116 /* modify nonzero structure of k-th row by computing fill-in 117 for each row i to be merged in */ 118 prow = k; 119 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 120 121 while (prow < k){ 122 /* merge row prow into k-th row */ 123 jmin = iu[prow] + 1; jmax = iu[prow+1]; 124 qm = k; 125 for (j=jmin; j<jmax; j++){ 126 vj = ju[j]; 127 do { 128 m = qm; qm = q[m]; 129 } while (qm < vj); 130 if (qm != vj){ 131 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 132 } 133 } 134 prow = jl[prow]; /* next pivot row */ 135 } 136 137 /* add k to row list for first nonzero element in k-th row */ 138 if (nzk > 0){ 139 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 140 jl[k] = jl[i]; jl[i] = k; 141 } 142 iu[k+1] = iu[k] + nzk; 143 144 /* allocate more space to ju if needed */ 145 if (iu[k+1] > umax) { 146 /* estimate how much additional space we will need */ 147 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 148 /* just double the memory each time */ 149 maxadd = umax; 150 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 151 umax += maxadd; 152 153 /* allocate a longer ju */ 154 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 155 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 156 ierr = PetscFree(ju);CHKERRQ(ierr); 157 ju = jutmp; 158 realloc++; /* count how many times we realloc */ 159 } 160 161 /* save nonzero structure of k-th row in ju */ 162 i=k; 163 while (nzk --) { 164 i = q[i]; 165 ju[juidx++] = i; 166 } 167 } 168 169 if (ai[mbs] != 0) { 170 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 171 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 172 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 173 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 174 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 175 } else { 176 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 177 } 178 179 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 180 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 181 ierr = PetscFree(jl);CHKERRQ(ierr); 182 183 /* put together the new matrix */ 184 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 185 /* PetscLogObjectParent(*B,iperm); */ 186 b = (Mat_SeqSBAIJ*)(*B)->data; 187 ierr = PetscFree(b->imax);CHKERRQ(ierr); 188 b->singlemalloc = PETSC_FALSE; 189 /* the next line frees the default space generated by the Create() */ 190 ierr = PetscFree(b->a);CHKERRQ(ierr); 191 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 192 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 193 b->j = ju; 194 b->i = iu; 195 b->diag = 0; 196 b->ilen = 0; 197 b->imax = 0; 198 b->row = perm; 199 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 200 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 201 b->icol = perm; 202 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 203 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 204 /* In b structure: Free imax, ilen, old a, old j. 205 Allocate idnew, solve_work, new a, new j */ 206 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 207 b->s_maxnz = b->s_nz = iu[mbs]; 208 209 (*B)->factor = FACTOR_CHOLESKY; 210 (*B)->info.factor_mallocs = realloc; 211 (*B)->info.fill_ratio_given = f; 212 if (ai[mbs] != 0) { 213 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 214 } else { 215 (*B)->info.fill_ratio_needed = 0.0; 216 } 217 218 if (perm_identity){ 219 switch (bs) { 220 case 1: 221 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 222 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 223 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 224 break; 225 case 2: 226 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 227 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 228 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 229 break; 230 case 3: 231 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 232 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 233 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 234 break; 235 case 4: 236 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 237 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 238 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 239 break; 240 case 5: 241 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 242 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 243 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 244 break; 245 case 6: 246 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 247 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 248 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 249 break; 250 case 7: 251 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 252 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 253 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 254 break; 255 default: 256 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 257 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 258 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 259 break; 260 } 261 } 262 263 PetscFunctionReturn(0); 264 } 265 266 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 269 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 270 { 271 Mat C = *B; 272 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 273 IS perm = b->row; 274 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 275 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 276 int bs=a->bs,bs2 = a->bs2; 277 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 278 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 279 MatScalar *work; 280 int *pivots; 281 282 PetscFunctionBegin; 283 284 /* initialization */ 285 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 286 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 287 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 288 jl = il + mbs; 289 for (i=0; i<mbs; i++) { 290 jl[i] = mbs; il[0] = 0; 291 } 292 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 293 uik = dk + bs2; 294 work = uik + bs2; 295 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 296 297 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 298 299 /* check permutation */ 300 if (!a->permute){ 301 ai = a->i; aj = a->j; aa = a->a; 302 } else { 303 ai = a->inew; aj = a->jnew; 304 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 305 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 306 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 307 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 308 309 for (i=0; i<mbs; i++){ 310 jmin = ai[i]; jmax = ai[i+1]; 311 for (j=jmin; j<jmax; j++){ 312 while (a2anew[j] != j){ 313 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 314 for (k1=0; k1<bs2; k1++){ 315 dk[k1] = aa[k*bs2+k1]; 316 aa[k*bs2+k1] = aa[j*bs2+k1]; 317 aa[j*bs2+k1] = dk[k1]; 318 } 319 } 320 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 321 if (i > aj[j]){ 322 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 323 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 324 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 325 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 326 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 327 } 328 } 329 } 330 } 331 ierr = PetscFree(a2anew);CHKERRQ(ierr); 332 } 333 334 /* for each row k */ 335 for (k = 0; k<mbs; k++){ 336 337 /*initialize k-th row with elements nonzero in row perm(k) of A */ 338 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 339 340 ap = aa + jmin*bs2; 341 for (j = jmin; j < jmax; j++){ 342 vj = perm_ptr[aj[j]]; /* block col. index */ 343 rtmp_ptr = rtmp + vj*bs2; 344 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 345 } 346 347 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 348 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 349 i = jl[k]; /* first row to be added to k_th row */ 350 351 while (i < k){ 352 nexti = jl[i]; /* next row to be added to k_th row */ 353 354 /* compute multiplier */ 355 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 356 357 /* uik = -inv(Di)*U_bar(i,k) */ 358 diag = ba + i*bs2; 359 u = ba + ili*bs2; 360 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 361 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 362 363 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 364 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 365 366 /* update -U(i,k) */ 367 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 368 369 /* add multiple of row i to k-th row ... */ 370 jmin = ili + 1; jmax = bi[i+1]; 371 if (jmin < jmax){ 372 for (j=jmin; j<jmax; j++) { 373 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 374 rtmp_ptr = rtmp + bj[j]*bs2; 375 u = ba + j*bs2; 376 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 377 } 378 379 /* ... add i to row list for next nonzero entry */ 380 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 381 j = bj[jmin]; 382 jl[i] = jl[j]; jl[j] = i; /* update jl */ 383 } 384 i = nexti; 385 } 386 387 /* save nonzero entries in k-th row of U ... */ 388 389 /* invert diagonal block */ 390 diag = ba+k*bs2; 391 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 392 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 393 394 jmin = bi[k]; jmax = bi[k+1]; 395 if (jmin < jmax) { 396 for (j=jmin; j<jmax; j++){ 397 vj = bj[j]; /* block col. index of U */ 398 u = ba + j*bs2; 399 rtmp_ptr = rtmp + vj*bs2; 400 for (k1=0; k1<bs2; k1++){ 401 *u++ = *rtmp_ptr; 402 *rtmp_ptr++ = 0.0; 403 } 404 } 405 406 /* ... add k to row list for first nonzero entry in k-th row */ 407 il[k] = jmin; 408 i = bj[jmin]; 409 jl[k] = jl[i]; jl[i] = k; 410 } 411 } 412 413 ierr = PetscFree(rtmp);CHKERRQ(ierr); 414 ierr = PetscFree(il);CHKERRQ(ierr); 415 ierr = PetscFree(dk);CHKERRQ(ierr); 416 ierr = PetscFree(pivots);CHKERRQ(ierr); 417 if (a->permute){ 418 ierr = PetscFree(aa);CHKERRQ(ierr); 419 } 420 421 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 422 C->factor = FACTOR_CHOLESKY; 423 C->assembled = PETSC_TRUE; 424 C->preallocated = PETSC_TRUE; 425 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 431 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 432 { 433 Mat C = *B; 434 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 435 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 436 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 437 int bs=a->bs,bs2 = a->bs2; 438 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 439 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 440 MatScalar *work; 441 int *pivots; 442 443 PetscFunctionBegin; 444 445 /* initialization */ 446 447 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 449 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 450 jl = il + mbs; 451 for (i=0; i<mbs; i++) { 452 jl[i] = mbs; il[0] = 0; 453 } 454 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 455 uik = dk + bs2; 456 work = uik + bs2; 457 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 458 459 ai = a->i; aj = a->j; aa = a->a; 460 461 /* for each row k */ 462 for (k = 0; k<mbs; k++){ 463 464 /*initialize k-th row with elements nonzero in row k of A */ 465 jmin = ai[k]; jmax = ai[k+1]; 466 ap = aa + jmin*bs2; 467 for (j = jmin; j < jmax; j++){ 468 vj = aj[j]; /* block col. index */ 469 rtmp_ptr = rtmp + vj*bs2; 470 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 471 } 472 473 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 474 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 475 i = jl[k]; /* first row to be added to k_th row */ 476 477 while (i < k){ 478 nexti = jl[i]; /* next row to be added to k_th row */ 479 480 /* compute multiplier */ 481 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 482 483 /* uik = -inv(Di)*U_bar(i,k) */ 484 diag = ba + i*bs2; 485 u = ba + ili*bs2; 486 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 487 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 488 489 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 490 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 491 492 /* update -U(i,k) */ 493 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 494 495 /* add multiple of row i to k-th row ... */ 496 jmin = ili + 1; jmax = bi[i+1]; 497 if (jmin < jmax){ 498 for (j=jmin; j<jmax; j++) { 499 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 500 rtmp_ptr = rtmp + bj[j]*bs2; 501 u = ba + j*bs2; 502 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 503 } 504 505 /* ... add i to row list for next nonzero entry */ 506 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 507 j = bj[jmin]; 508 jl[i] = jl[j]; jl[j] = i; /* update jl */ 509 } 510 i = nexti; 511 } 512 513 /* save nonzero entries in k-th row of U ... */ 514 515 /* invert diagonal block */ 516 diag = ba+k*bs2; 517 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 518 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 519 520 jmin = bi[k]; jmax = bi[k+1]; 521 if (jmin < jmax) { 522 for (j=jmin; j<jmax; j++){ 523 vj = bj[j]; /* block col. index of U */ 524 u = ba + j*bs2; 525 rtmp_ptr = rtmp + vj*bs2; 526 for (k1=0; k1<bs2; k1++){ 527 *u++ = *rtmp_ptr; 528 *rtmp_ptr++ = 0.0; 529 } 530 } 531 532 /* ... add k to row list for first nonzero entry in k-th row */ 533 il[k] = jmin; 534 i = bj[jmin]; 535 jl[k] = jl[i]; jl[i] = k; 536 } 537 } 538 539 ierr = PetscFree(rtmp);CHKERRQ(ierr); 540 ierr = PetscFree(il);CHKERRQ(ierr); 541 ierr = PetscFree(dk);CHKERRQ(ierr); 542 ierr = PetscFree(pivots);CHKERRQ(ierr); 543 544 C->factor = FACTOR_CHOLESKY; 545 C->assembled = PETSC_TRUE; 546 C->preallocated = PETSC_TRUE; 547 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 548 PetscFunctionReturn(0); 549 } 550 551 /* 552 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 553 Version for blocks 2 by 2. 554 */ 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 557 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 558 { 559 Mat C = *B; 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 561 IS perm = b->row; 562 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 563 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 564 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 565 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 566 567 PetscFunctionBegin; 568 569 /* initialization */ 570 /* il and jl record the first nonzero element in each row of the accessing 571 window U(0:k, k:mbs-1). 572 jl: list of rows to be added to uneliminated rows 573 i>= k: jl(i) is the first row to be added to row i 574 i< k: jl(i) is the row following row i in some list of rows 575 jl(i) = mbs indicates the end of a list 576 il(i): points to the first nonzero element in columns k,...,mbs-1 of 577 row i of U */ 578 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 579 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 580 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 581 jl = il + mbs; 582 for (i=0; i<mbs; i++) { 583 jl[i] = mbs; il[0] = 0; 584 } 585 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 586 uik = dk + 4; 587 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 588 589 /* check permutation */ 590 if (!a->permute){ 591 ai = a->i; aj = a->j; aa = a->a; 592 } else { 593 ai = a->inew; aj = a->jnew; 594 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 595 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 596 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 597 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 598 599 for (i=0; i<mbs; i++){ 600 jmin = ai[i]; jmax = ai[i+1]; 601 for (j=jmin; j<jmax; j++){ 602 while (a2anew[j] != j){ 603 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 604 for (k1=0; k1<4; k1++){ 605 dk[k1] = aa[k*4+k1]; 606 aa[k*4+k1] = aa[j*4+k1]; 607 aa[j*4+k1] = dk[k1]; 608 } 609 } 610 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 611 if (i > aj[j]){ 612 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 613 ap = aa + j*4; /* ptr to the beginning of the block */ 614 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 615 ap[1] = ap[2]; 616 ap[2] = dk[1]; 617 } 618 } 619 } 620 ierr = PetscFree(a2anew);CHKERRQ(ierr); 621 } 622 623 /* for each row k */ 624 for (k = 0; k<mbs; k++){ 625 626 /*initialize k-th row with elements nonzero in row perm(k) of A */ 627 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 628 ap = aa + jmin*4; 629 for (j = jmin; j < jmax; j++){ 630 vj = perm_ptr[aj[j]]; /* block col. index */ 631 rtmp_ptr = rtmp + vj*4; 632 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 633 } 634 635 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 636 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 637 i = jl[k]; /* first row to be added to k_th row */ 638 639 while (i < k){ 640 nexti = jl[i]; /* next row to be added to k_th row */ 641 642 /* compute multiplier */ 643 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 644 645 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 646 diag = ba + i*4; 647 u = ba + ili*4; 648 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 649 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 650 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 651 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 652 653 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 654 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 655 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 656 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 657 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 658 659 /* update -U(i,k): ba[ili] = uik */ 660 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 661 662 /* add multiple of row i to k-th row ... */ 663 jmin = ili + 1; jmax = bi[i+1]; 664 if (jmin < jmax){ 665 for (j=jmin; j<jmax; j++) { 666 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 667 rtmp_ptr = rtmp + bj[j]*4; 668 u = ba + j*4; 669 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 670 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 671 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 672 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 673 } 674 675 /* ... add i to row list for next nonzero entry */ 676 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 677 j = bj[jmin]; 678 jl[i] = jl[j]; jl[j] = i; /* update jl */ 679 } 680 i = nexti; 681 } 682 683 /* save nonzero entries in k-th row of U ... */ 684 685 /* invert diagonal block */ 686 diag = ba+k*4; 687 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 688 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 689 690 jmin = bi[k]; jmax = bi[k+1]; 691 if (jmin < jmax) { 692 for (j=jmin; j<jmax; j++){ 693 vj = bj[j]; /* block col. index of U */ 694 u = ba + j*4; 695 rtmp_ptr = rtmp + vj*4; 696 for (k1=0; k1<4; k1++){ 697 *u++ = *rtmp_ptr; 698 *rtmp_ptr++ = 0.0; 699 } 700 } 701 702 /* ... add k to row list for first nonzero entry in k-th row */ 703 il[k] = jmin; 704 i = bj[jmin]; 705 jl[k] = jl[i]; jl[i] = k; 706 } 707 } 708 709 ierr = PetscFree(rtmp);CHKERRQ(ierr); 710 ierr = PetscFree(il);CHKERRQ(ierr); 711 ierr = PetscFree(dk);CHKERRQ(ierr); 712 if (a->permute) { 713 ierr = PetscFree(aa);CHKERRQ(ierr); 714 } 715 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 716 C->factor = FACTOR_CHOLESKY; 717 C->assembled = PETSC_TRUE; 718 C->preallocated = PETSC_TRUE; 719 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 720 PetscFunctionReturn(0); 721 } 722 723 /* 724 Version for when blocks are 2 by 2 Using natural ordering 725 */ 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 728 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 729 { 730 Mat C = *B; 731 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 732 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 733 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 734 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 735 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 736 737 PetscFunctionBegin; 738 739 /* initialization */ 740 /* il and jl record the first nonzero element in each row of the accessing 741 window U(0:k, k:mbs-1). 742 jl: list of rows to be added to uneliminated rows 743 i>= k: jl(i) is the first row to be added to row i 744 i< k: jl(i) is the row following row i in some list of rows 745 jl(i) = mbs indicates the end of a list 746 il(i): points to the first nonzero element in columns k,...,mbs-1 of 747 row i of U */ 748 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 749 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 750 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 751 jl = il + mbs; 752 for (i=0; i<mbs; i++) { 753 jl[i] = mbs; il[0] = 0; 754 } 755 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 756 uik = dk + 4; 757 758 ai = a->i; aj = a->j; aa = a->a; 759 760 /* for each row k */ 761 for (k = 0; k<mbs; k++){ 762 763 /*initialize k-th row with elements nonzero in row k of A */ 764 jmin = ai[k]; jmax = ai[k+1]; 765 ap = aa + jmin*4; 766 for (j = jmin; j < jmax; j++){ 767 vj = aj[j]; /* block col. index */ 768 rtmp_ptr = rtmp + vj*4; 769 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 770 } 771 772 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 773 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 774 i = jl[k]; /* first row to be added to k_th row */ 775 776 while (i < k){ 777 nexti = jl[i]; /* next row to be added to k_th row */ 778 779 /* compute multiplier */ 780 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 781 782 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 783 diag = ba + i*4; 784 u = ba + ili*4; 785 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 786 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 787 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 788 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 789 790 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 791 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 792 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 793 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 794 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 795 796 /* update -U(i,k): ba[ili] = uik */ 797 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 798 799 /* add multiple of row i to k-th row ... */ 800 jmin = ili + 1; jmax = bi[i+1]; 801 if (jmin < jmax){ 802 for (j=jmin; j<jmax; j++) { 803 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 804 rtmp_ptr = rtmp + bj[j]*4; 805 u = ba + j*4; 806 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 807 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 808 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 809 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 810 } 811 812 /* ... add i to row list for next nonzero entry */ 813 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 814 j = bj[jmin]; 815 jl[i] = jl[j]; jl[j] = i; /* update jl */ 816 } 817 i = nexti; 818 } 819 820 /* save nonzero entries in k-th row of U ... */ 821 822 /* invert diagonal block */ 823 diag = ba+k*4; 824 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 825 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 826 827 jmin = bi[k]; jmax = bi[k+1]; 828 if (jmin < jmax) { 829 for (j=jmin; j<jmax; j++){ 830 vj = bj[j]; /* block col. index of U */ 831 u = ba + j*4; 832 rtmp_ptr = rtmp + vj*4; 833 for (k1=0; k1<4; k1++){ 834 *u++ = *rtmp_ptr; 835 *rtmp_ptr++ = 0.0; 836 } 837 } 838 839 /* ... add k to row list for first nonzero entry in k-th row */ 840 il[k] = jmin; 841 i = bj[jmin]; 842 jl[k] = jl[i]; jl[i] = k; 843 } 844 } 845 846 ierr = PetscFree(rtmp);CHKERRQ(ierr); 847 ierr = PetscFree(il);CHKERRQ(ierr); 848 ierr = PetscFree(dk);CHKERRQ(ierr); 849 850 C->factor = FACTOR_CHOLESKY; 851 C->assembled = PETSC_TRUE; 852 C->preallocated = PETSC_TRUE; 853 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 854 PetscFunctionReturn(0); 855 } 856 857 /* 858 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 859 Version for blocks are 1 by 1. 860 */ 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 863 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 864 { 865 Mat C = *B; 866 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 867 IS ip = b->row; 868 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 869 int *ai,*aj,*r; 870 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 871 MatScalar *rtmp; 872 MatScalar *ba = b->a,*aa,ak; 873 MatScalar dk,uikdi; 874 875 PetscFunctionBegin; 876 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 877 if (!a->permute){ 878 ai = a->i; aj = a->j; aa = a->a; 879 } else { 880 ai = a->inew; aj = a->jnew; 881 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 882 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 883 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 884 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 885 886 jmin = ai[0]; jmax = ai[mbs]; 887 for (j=jmin; j<jmax; j++){ 888 while (r[j] != j){ 889 k = r[j]; r[j] = r[k]; r[k] = k; 890 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 891 } 892 } 893 ierr = PetscFree(r);CHKERRQ(ierr); 894 } 895 896 /* initialization */ 897 /* il and jl record the first nonzero element in each row of the accessing 898 window U(0:k, k:mbs-1). 899 jl: list of rows to be added to uneliminated rows 900 i>= k: jl(i) is the first row to be added to row i 901 i< k: jl(i) is the row following row i in some list of rows 902 jl(i) = mbs indicates the end of a list 903 il(i): points to the first nonzero element in columns k,...,mbs-1 of 904 row i of U */ 905 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 906 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 907 jl = il + mbs; 908 for (i=0; i<mbs; i++) { 909 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 910 } 911 912 /* for each row k */ 913 for (k = 0; k<mbs; k++){ 914 915 /*initialize k-th row with elements nonzero in row perm(k) of A */ 916 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 917 918 for (j = jmin; j < jmax; j++){ 919 vj = rip[aj[j]]; 920 rtmp[vj] = aa[j]; 921 } 922 923 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 924 dk = rtmp[k]; 925 i = jl[k]; /* first row to be added to k_th row */ 926 927 while (i < k){ 928 nexti = jl[i]; /* next row to be added to k_th row */ 929 930 /* compute multiplier, update D(k) and U(i,k) */ 931 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 932 uikdi = - ba[ili]*ba[i]; 933 dk += uikdi*ba[ili]; 934 ba[ili] = uikdi; /* -U(i,k) */ 935 936 /* add multiple of row i to k-th row ... */ 937 jmin = ili + 1; jmax = bi[i+1]; 938 if (jmin < jmax){ 939 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 940 /* ... add i to row list for next nonzero entry */ 941 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 942 j = bj[jmin]; 943 jl[i] = jl[j]; jl[j] = i; /* update jl */ 944 } 945 i = nexti; 946 } 947 948 /* check for zero pivot and save diagoanl element */ 949 if (dk == 0.0){ 950 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 951 /* 952 } else if (PetscRealPart(dk) < 0.0){ 953 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 954 */ 955 } 956 957 /* save nonzero entries in k-th row of U ... */ 958 ba[k] = 1.0/dk; 959 jmin = bi[k]; jmax = bi[k+1]; 960 if (jmin < jmax) { 961 for (j=jmin; j<jmax; j++){ 962 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 963 } 964 /* ... add k to row list for first nonzero entry in k-th row */ 965 il[k] = jmin; 966 i = bj[jmin]; 967 jl[k] = jl[i]; jl[i] = k; 968 } 969 } 970 971 ierr = PetscFree(rtmp);CHKERRQ(ierr); 972 ierr = PetscFree(il);CHKERRQ(ierr); 973 if (a->permute){ 974 ierr = PetscFree(aa);CHKERRQ(ierr); 975 } 976 977 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 978 C->factor = FACTOR_CHOLESKY; 979 C->assembled = PETSC_TRUE; 980 C->preallocated = PETSC_TRUE; 981 PetscLogFlops(b->mbs); 982 PetscFunctionReturn(0); 983 } 984 985 /* 986 Version for when blocks are 1 by 1 Using natural ordering 987 */ 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 990 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 991 { 992 Mat C = *B; 993 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 994 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 995 int *ai,*aj; 996 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 997 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 998 999 PetscFunctionBegin; 1000 1001 /* initialization */ 1002 /* il and jl record the first nonzero element in each row of the accessing 1003 window U(0:k, k:mbs-1). 1004 jl: list of rows to be added to uneliminated rows 1005 i>= k: jl(i) is the first row to be added to row i 1006 i< k: jl(i) is the row following row i in some list of rows 1007 jl(i) = mbs indicates the end of a list 1008 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1009 row i of U */ 1010 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1011 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1012 jl = il + mbs; 1013 for (i=0; i<mbs; i++) { 1014 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1015 } 1016 1017 ai = a->i; aj = a->j; aa = a->a; 1018 1019 /* for each row k */ 1020 for (k = 0; k<mbs; k++){ 1021 1022 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1023 jmin = ai[k]; jmax = ai[k+1]; 1024 1025 for (j = jmin; j < jmax; j++){ 1026 vj = aj[j]; 1027 rtmp[vj] = aa[j]; 1028 } 1029 1030 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1031 dk = rtmp[k]; 1032 i = jl[k]; /* first row to be added to k_th row */ 1033 1034 while (i < k){ 1035 nexti = jl[i]; /* next row to be added to k_th row */ 1036 1037 /* compute multiplier, update D(k) and U(i,k) */ 1038 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1039 uikdi = - ba[ili]*ba[i]; 1040 dk += uikdi*ba[ili]; 1041 ba[ili] = uikdi; /* -U(i,k) */ 1042 1043 /* add multiple of row i to k-th row ... */ 1044 jmin = ili + 1; jmax = bi[i+1]; 1045 if (jmin < jmax){ 1046 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1047 /* ... add i to row list for next nonzero entry */ 1048 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1049 j = bj[jmin]; 1050 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1051 } 1052 i = nexti; 1053 } 1054 1055 /* check for zero pivot and save diagoanl element */ 1056 if (dk == 0.0){ 1057 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1058 /* 1059 } else if (PetscRealPart(dk) < 0){ 1060 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1061 */ 1062 } 1063 1064 /* save nonzero entries in k-th row of U ... */ 1065 ba[k] = 1.0/dk; 1066 jmin = bi[k]; jmax = bi[k+1]; 1067 if (jmin < jmax) { 1068 for (j=jmin; j<jmax; j++){ 1069 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1070 } 1071 /* ... add k to row list for first nonzero entry in k-th row */ 1072 il[k] = jmin; 1073 i = bj[jmin]; 1074 jl[k] = jl[i]; jl[i] = k; 1075 } 1076 } 1077 1078 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1079 ierr = PetscFree(il);CHKERRQ(ierr); 1080 1081 C->factor = FACTOR_CHOLESKY; 1082 C->assembled = PETSC_TRUE; 1083 C->preallocated = PETSC_TRUE; 1084 PetscLogFlops(b->mbs); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1090 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1091 { 1092 int ierr; 1093 Mat C; 1094 1095 PetscFunctionBegin; 1096 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1097 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1098 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 1103