1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 #if !defined(PETSC_USE_COMPLEX) 10 /* 11 input: 12 A -- original matrix in SEQSBAIJ format 13 F -- symbolic or numeric factor of A 14 output: 15 F -- numeric factor of A if F is input as symbolic factor of A 16 nneg, nzero, npos: inertia of A 17 */ 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 21 int MatGetInertia_SeqSBAIJ(Mat A,Mat *F,int *nneig,int *nzero,int *npos) 22 { 23 PetscScalar *dd; 24 Mat_SeqSBAIJ *fact_ptr; 25 int m=A->m,i; 26 27 PetscFunctionBegin; 28 if (! (*F)->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for symfactor yet, use numfactor F"); 29 30 fact_ptr = (Mat_SeqSBAIJ*)(*F)->data; 31 dd = fact_ptr->a; 32 *nneig = 0; 33 for (i=0; i<m; i++){ 34 if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 35 } 36 37 PetscFunctionReturn(0); 38 } 39 #endif /* !defined(PETSC_USE_COMPLEX) */ 40 41 /* Using Modified Sparse Row (MSR) storage. 42 See page 85, "Iterative Methods ..." by Saad. */ 43 /* 44 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 45 */ 46 /* Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */ 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 49 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 50 { 51 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 52 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 53 int *jutmp,bs = a->bs,bs2=a->bs2; 54 int m,realloc = 0,prow; 55 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 56 PetscTruth perm_identity; 57 58 PetscFunctionBegin; 59 60 /* check whether perm is the identity mapping */ 61 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 62 if (!perm_identity) a->permute = PETSC_TRUE; 63 64 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 65 66 if (perm_identity){ /* without permutation */ 67 ai = a->i; aj = a->j; 68 } else { /* non-trivial permutation */ 69 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 70 ai = a->inew; aj = a->jnew; 71 } 72 73 /* initialization */ 74 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 75 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 76 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 77 iu[0] = mbs+1; 78 juidx = mbs + 1; /* index for ju */ 79 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 80 q = jl + mbs; /* linked list for col index */ 81 for (i=0; i<mbs; i++){ 82 jl[i] = mbs; 83 q[i] = 0; 84 } 85 86 /* for each row k */ 87 for (k=0; k<mbs; k++){ 88 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 89 q[k] = mbs; 90 /* initialize nonzero structure of k-th row to row rip[k] of A */ 91 jmin = ai[rip[k]]; 92 jmax = ai[rip[k]+1]; 93 for (j=jmin; j<jmax; j++){ 94 vj = rip[aj[j]]; /* col. value */ 95 if(vj > k){ 96 qm = k; 97 do { 98 m = qm; qm = q[m]; 99 } while(qm < vj); 100 if (qm == vj) { 101 SETERRQ(1," error: duplicate entry in A\n"); 102 } 103 nzk++; 104 q[m] = vj; 105 q[vj] = qm; 106 } /* if(vj > k) */ 107 } /* for (j=jmin; j<jmax; j++) */ 108 109 /* modify nonzero structure of k-th row by computing fill-in 110 for each row i to be merged in */ 111 prow = k; 112 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 113 114 while (prow < k){ 115 /* merge row prow into k-th row */ 116 jmin = iu[prow] + 1; jmax = iu[prow+1]; 117 qm = k; 118 for (j=jmin; j<jmax; j++){ 119 vj = ju[j]; 120 do { 121 m = qm; qm = q[m]; 122 } while (qm < vj); 123 if (qm != vj){ 124 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 125 } 126 } 127 prow = jl[prow]; /* next pivot row */ 128 } 129 130 /* add k to row list for first nonzero element in k-th row */ 131 if (nzk > 0){ 132 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 133 jl[k] = jl[i]; jl[i] = k; 134 } 135 iu[k+1] = iu[k] + nzk; 136 137 /* allocate more space to ju if needed */ 138 if (iu[k+1] > umax) { 139 /* estimate how much additional space we will need */ 140 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 141 /* just double the memory each time */ 142 maxadd = umax; 143 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 144 umax += maxadd; 145 146 /* allocate a longer ju */ 147 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 148 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 149 ierr = PetscFree(ju);CHKERRQ(ierr); 150 ju = jutmp; 151 realloc++; /* count how many times we realloc */ 152 } 153 154 /* save nonzero structure of k-th row in ju */ 155 i=k; 156 while (nzk --) { 157 i = q[i]; 158 ju[juidx++] = i; 159 } 160 } 161 162 if (ai[mbs] != 0) { 163 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 164 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 165 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 166 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 167 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 168 } else { 169 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 170 } 171 172 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 173 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 174 ierr = PetscFree(jl);CHKERRQ(ierr); 175 176 /* put together the new matrix */ 177 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 178 /* PetscLogObjectParent(*B,iperm); */ 179 b = (Mat_SeqSBAIJ*)(*B)->data; 180 ierr = PetscFree(b->imax);CHKERRQ(ierr); 181 b->singlemalloc = PETSC_FALSE; 182 /* the next line frees the default space generated by the Create() */ 183 ierr = PetscFree(b->a);CHKERRQ(ierr); 184 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 185 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 186 b->j = ju; 187 b->i = iu; 188 b->diag = 0; 189 b->ilen = 0; 190 b->imax = 0; 191 b->row = perm; 192 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 193 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 194 b->icol = perm; 195 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 196 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 197 /* In b structure: Free imax, ilen, old a, old j. 198 Allocate idnew, solve_work, new a, new j */ 199 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 200 b->s_maxnz = b->s_nz = iu[mbs]; 201 202 (*B)->factor = FACTOR_CHOLESKY; 203 (*B)->info.factor_mallocs = realloc; 204 (*B)->info.fill_ratio_given = f; 205 if (ai[mbs] != 0) { 206 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 207 } else { 208 (*B)->info.fill_ratio_needed = 0.0; 209 } 210 211 if (perm_identity){ 212 switch (bs) { 213 case 1: 214 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 215 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 216 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 217 break; 218 case 2: 219 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 220 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 221 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 222 break; 223 case 3: 224 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 225 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 226 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 227 break; 228 case 4: 229 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 230 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 231 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 232 break; 233 case 5: 234 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 235 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 236 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 237 break; 238 case 6: 239 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 240 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 241 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 242 break; 243 case 7: 244 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 245 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 246 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 247 break; 248 default: 249 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 250 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 251 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 252 break; 253 } 254 } 255 256 PetscFunctionReturn(0); 257 } 258 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 262 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 263 { 264 Mat C = *B; 265 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 266 IS perm = b->row; 267 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 268 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 269 int bs=a->bs,bs2 = a->bs2; 270 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 271 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 272 MatScalar *work; 273 int *pivots; 274 275 PetscFunctionBegin; 276 277 /* initialization */ 278 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 279 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 280 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 281 jl = il + mbs; 282 for (i=0; i<mbs; i++) { 283 jl[i] = mbs; il[0] = 0; 284 } 285 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 286 uik = dk + bs2; 287 work = uik + bs2; 288 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 289 290 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 291 292 /* check permutation */ 293 if (!a->permute){ 294 ai = a->i; aj = a->j; aa = a->a; 295 } else { 296 ai = a->inew; aj = a->jnew; 297 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 298 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 299 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 300 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 301 302 for (i=0; i<mbs; i++){ 303 jmin = ai[i]; jmax = ai[i+1]; 304 for (j=jmin; j<jmax; j++){ 305 while (a2anew[j] != j){ 306 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 307 for (k1=0; k1<bs2; k1++){ 308 dk[k1] = aa[k*bs2+k1]; 309 aa[k*bs2+k1] = aa[j*bs2+k1]; 310 aa[j*bs2+k1] = dk[k1]; 311 } 312 } 313 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 314 if (i > aj[j]){ 315 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 316 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 317 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 318 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 319 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 320 } 321 } 322 } 323 } 324 ierr = PetscFree(a2anew);CHKERRQ(ierr); 325 } 326 327 /* for each row k */ 328 for (k = 0; k<mbs; k++){ 329 330 /*initialize k-th row with elements nonzero in row perm(k) of A */ 331 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 332 333 ap = aa + jmin*bs2; 334 for (j = jmin; j < jmax; j++){ 335 vj = perm_ptr[aj[j]]; /* block col. index */ 336 rtmp_ptr = rtmp + vj*bs2; 337 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 338 } 339 340 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 341 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 342 i = jl[k]; /* first row to be added to k_th row */ 343 344 while (i < k){ 345 nexti = jl[i]; /* next row to be added to k_th row */ 346 347 /* compute multiplier */ 348 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 349 350 /* uik = -inv(Di)*U_bar(i,k) */ 351 diag = ba + i*bs2; 352 u = ba + ili*bs2; 353 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 354 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 355 356 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 357 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 358 359 /* update -U(i,k) */ 360 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 361 362 /* add multiple of row i to k-th row ... */ 363 jmin = ili + 1; jmax = bi[i+1]; 364 if (jmin < jmax){ 365 for (j=jmin; j<jmax; j++) { 366 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 367 rtmp_ptr = rtmp + bj[j]*bs2; 368 u = ba + j*bs2; 369 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 370 } 371 372 /* ... add i to row list for next nonzero entry */ 373 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 374 j = bj[jmin]; 375 jl[i] = jl[j]; jl[j] = i; /* update jl */ 376 } 377 i = nexti; 378 } 379 380 /* save nonzero entries in k-th row of U ... */ 381 382 /* invert diagonal block */ 383 diag = ba+k*bs2; 384 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 385 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 386 387 jmin = bi[k]; jmax = bi[k+1]; 388 if (jmin < jmax) { 389 for (j=jmin; j<jmax; j++){ 390 vj = bj[j]; /* block col. index of U */ 391 u = ba + j*bs2; 392 rtmp_ptr = rtmp + vj*bs2; 393 for (k1=0; k1<bs2; k1++){ 394 *u++ = *rtmp_ptr; 395 *rtmp_ptr++ = 0.0; 396 } 397 } 398 399 /* ... add k to row list for first nonzero entry in k-th row */ 400 il[k] = jmin; 401 i = bj[jmin]; 402 jl[k] = jl[i]; jl[i] = k; 403 } 404 } 405 406 ierr = PetscFree(rtmp);CHKERRQ(ierr); 407 ierr = PetscFree(il);CHKERRQ(ierr); 408 ierr = PetscFree(dk);CHKERRQ(ierr); 409 ierr = PetscFree(pivots);CHKERRQ(ierr); 410 if (a->permute){ 411 ierr = PetscFree(aa);CHKERRQ(ierr); 412 } 413 414 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 415 C->factor = FACTOR_CHOLESKY; 416 C->assembled = PETSC_TRUE; 417 C->preallocated = PETSC_TRUE; 418 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 424 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 425 { 426 Mat C = *B; 427 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 428 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 429 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 430 int bs=a->bs,bs2 = a->bs2; 431 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 432 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 433 MatScalar *work; 434 int *pivots; 435 436 PetscFunctionBegin; 437 438 /* initialization */ 439 440 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 441 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 442 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 443 jl = il + mbs; 444 for (i=0; i<mbs; i++) { 445 jl[i] = mbs; il[0] = 0; 446 } 447 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 448 uik = dk + bs2; 449 work = uik + bs2; 450 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 451 452 ai = a->i; aj = a->j; aa = a->a; 453 454 /* for each row k */ 455 for (k = 0; k<mbs; k++){ 456 457 /*initialize k-th row with elements nonzero in row k of A */ 458 jmin = ai[k]; jmax = ai[k+1]; 459 ap = aa + jmin*bs2; 460 for (j = jmin; j < jmax; j++){ 461 vj = aj[j]; /* block col. index */ 462 rtmp_ptr = rtmp + vj*bs2; 463 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 464 } 465 466 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 467 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 468 i = jl[k]; /* first row to be added to k_th row */ 469 470 while (i < k){ 471 nexti = jl[i]; /* next row to be added to k_th row */ 472 473 /* compute multiplier */ 474 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 475 476 /* uik = -inv(Di)*U_bar(i,k) */ 477 diag = ba + i*bs2; 478 u = ba + ili*bs2; 479 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 480 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 481 482 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 483 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 484 485 /* update -U(i,k) */ 486 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 487 488 /* add multiple of row i to k-th row ... */ 489 jmin = ili + 1; jmax = bi[i+1]; 490 if (jmin < jmax){ 491 for (j=jmin; j<jmax; j++) { 492 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 493 rtmp_ptr = rtmp + bj[j]*bs2; 494 u = ba + j*bs2; 495 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 496 } 497 498 /* ... add i to row list for next nonzero entry */ 499 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 500 j = bj[jmin]; 501 jl[i] = jl[j]; jl[j] = i; /* update jl */ 502 } 503 i = nexti; 504 } 505 506 /* save nonzero entries in k-th row of U ... */ 507 508 /* invert diagonal block */ 509 diag = ba+k*bs2; 510 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 511 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 512 513 jmin = bi[k]; jmax = bi[k+1]; 514 if (jmin < jmax) { 515 for (j=jmin; j<jmax; j++){ 516 vj = bj[j]; /* block col. index of U */ 517 u = ba + j*bs2; 518 rtmp_ptr = rtmp + vj*bs2; 519 for (k1=0; k1<bs2; k1++){ 520 *u++ = *rtmp_ptr; 521 *rtmp_ptr++ = 0.0; 522 } 523 } 524 525 /* ... add k to row list for first nonzero entry in k-th row */ 526 il[k] = jmin; 527 i = bj[jmin]; 528 jl[k] = jl[i]; jl[i] = k; 529 } 530 } 531 532 ierr = PetscFree(rtmp);CHKERRQ(ierr); 533 ierr = PetscFree(il);CHKERRQ(ierr); 534 ierr = PetscFree(dk);CHKERRQ(ierr); 535 ierr = PetscFree(pivots);CHKERRQ(ierr); 536 537 C->factor = FACTOR_CHOLESKY; 538 C->assembled = PETSC_TRUE; 539 C->preallocated = PETSC_TRUE; 540 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 541 PetscFunctionReturn(0); 542 } 543 544 /* 545 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 546 Version for blocks 2 by 2. 547 */ 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 550 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 551 { 552 Mat C = *B; 553 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 554 IS perm = b->row; 555 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 556 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 557 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 558 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 559 560 PetscFunctionBegin; 561 562 /* initialization */ 563 /* il and jl record the first nonzero element in each row of the accessing 564 window U(0:k, k:mbs-1). 565 jl: list of rows to be added to uneliminated rows 566 i>= k: jl(i) is the first row to be added to row i 567 i< k: jl(i) is the row following row i in some list of rows 568 jl(i) = mbs indicates the end of a list 569 il(i): points to the first nonzero element in columns k,...,mbs-1 of 570 row i of U */ 571 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 572 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 573 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 574 jl = il + mbs; 575 for (i=0; i<mbs; i++) { 576 jl[i] = mbs; il[0] = 0; 577 } 578 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 579 uik = dk + 4; 580 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 581 582 /* check permutation */ 583 if (!a->permute){ 584 ai = a->i; aj = a->j; aa = a->a; 585 } else { 586 ai = a->inew; aj = a->jnew; 587 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 588 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 589 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 590 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 591 592 for (i=0; i<mbs; i++){ 593 jmin = ai[i]; jmax = ai[i+1]; 594 for (j=jmin; j<jmax; j++){ 595 while (a2anew[j] != j){ 596 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 597 for (k1=0; k1<4; k1++){ 598 dk[k1] = aa[k*4+k1]; 599 aa[k*4+k1] = aa[j*4+k1]; 600 aa[j*4+k1] = dk[k1]; 601 } 602 } 603 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 604 if (i > aj[j]){ 605 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 606 ap = aa + j*4; /* ptr to the beginning of the block */ 607 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 608 ap[1] = ap[2]; 609 ap[2] = dk[1]; 610 } 611 } 612 } 613 ierr = PetscFree(a2anew);CHKERRQ(ierr); 614 } 615 616 /* for each row k */ 617 for (k = 0; k<mbs; k++){ 618 619 /*initialize k-th row with elements nonzero in row perm(k) of A */ 620 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 621 ap = aa + jmin*4; 622 for (j = jmin; j < jmax; j++){ 623 vj = perm_ptr[aj[j]]; /* block col. index */ 624 rtmp_ptr = rtmp + vj*4; 625 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 626 } 627 628 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 629 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 630 i = jl[k]; /* first row to be added to k_th row */ 631 632 while (i < k){ 633 nexti = jl[i]; /* next row to be added to k_th row */ 634 635 /* compute multiplier */ 636 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 637 638 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 639 diag = ba + i*4; 640 u = ba + ili*4; 641 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 642 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 643 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 644 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 645 646 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 647 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 648 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 649 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 650 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 651 652 /* update -U(i,k): ba[ili] = uik */ 653 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 654 655 /* add multiple of row i to k-th row ... */ 656 jmin = ili + 1; jmax = bi[i+1]; 657 if (jmin < jmax){ 658 for (j=jmin; j<jmax; j++) { 659 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 660 rtmp_ptr = rtmp + bj[j]*4; 661 u = ba + j*4; 662 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 663 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 664 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 665 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 666 } 667 668 /* ... add i to row list for next nonzero entry */ 669 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 670 j = bj[jmin]; 671 jl[i] = jl[j]; jl[j] = i; /* update jl */ 672 } 673 i = nexti; 674 } 675 676 /* save nonzero entries in k-th row of U ... */ 677 678 /* invert diagonal block */ 679 diag = ba+k*4; 680 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 681 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 682 683 jmin = bi[k]; jmax = bi[k+1]; 684 if (jmin < jmax) { 685 for (j=jmin; j<jmax; j++){ 686 vj = bj[j]; /* block col. index of U */ 687 u = ba + j*4; 688 rtmp_ptr = rtmp + vj*4; 689 for (k1=0; k1<4; k1++){ 690 *u++ = *rtmp_ptr; 691 *rtmp_ptr++ = 0.0; 692 } 693 } 694 695 /* ... add k to row list for first nonzero entry in k-th row */ 696 il[k] = jmin; 697 i = bj[jmin]; 698 jl[k] = jl[i]; jl[i] = k; 699 } 700 } 701 702 ierr = PetscFree(rtmp);CHKERRQ(ierr); 703 ierr = PetscFree(il);CHKERRQ(ierr); 704 ierr = PetscFree(dk);CHKERRQ(ierr); 705 if (a->permute) { 706 ierr = PetscFree(aa);CHKERRQ(ierr); 707 } 708 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 709 C->factor = FACTOR_CHOLESKY; 710 C->assembled = PETSC_TRUE; 711 C->preallocated = PETSC_TRUE; 712 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 713 PetscFunctionReturn(0); 714 } 715 716 /* 717 Version for when blocks are 2 by 2 Using natural ordering 718 */ 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 721 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 722 { 723 Mat C = *B; 724 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 725 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 726 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 727 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 728 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 729 730 PetscFunctionBegin; 731 732 /* initialization */ 733 /* il and jl record the first nonzero element in each row of the accessing 734 window U(0:k, k:mbs-1). 735 jl: list of rows to be added to uneliminated rows 736 i>= k: jl(i) is the first row to be added to row i 737 i< k: jl(i) is the row following row i in some list of rows 738 jl(i) = mbs indicates the end of a list 739 il(i): points to the first nonzero element in columns k,...,mbs-1 of 740 row i of U */ 741 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 742 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 743 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 744 jl = il + mbs; 745 for (i=0; i<mbs; i++) { 746 jl[i] = mbs; il[0] = 0; 747 } 748 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 749 uik = dk + 4; 750 751 ai = a->i; aj = a->j; aa = a->a; 752 753 /* for each row k */ 754 for (k = 0; k<mbs; k++){ 755 756 /*initialize k-th row with elements nonzero in row k of A */ 757 jmin = ai[k]; jmax = ai[k+1]; 758 ap = aa + jmin*4; 759 for (j = jmin; j < jmax; j++){ 760 vj = aj[j]; /* block col. index */ 761 rtmp_ptr = rtmp + vj*4; 762 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 763 } 764 765 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 766 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 767 i = jl[k]; /* first row to be added to k_th row */ 768 769 while (i < k){ 770 nexti = jl[i]; /* next row to be added to k_th row */ 771 772 /* compute multiplier */ 773 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 774 775 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 776 diag = ba + i*4; 777 u = ba + ili*4; 778 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 779 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 780 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 781 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 782 783 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 784 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 785 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 786 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 787 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 788 789 /* update -U(i,k): ba[ili] = uik */ 790 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 791 792 /* add multiple of row i to k-th row ... */ 793 jmin = ili + 1; jmax = bi[i+1]; 794 if (jmin < jmax){ 795 for (j=jmin; j<jmax; j++) { 796 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 797 rtmp_ptr = rtmp + bj[j]*4; 798 u = ba + j*4; 799 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 800 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 801 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 802 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 803 } 804 805 /* ... add i to row list for next nonzero entry */ 806 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 807 j = bj[jmin]; 808 jl[i] = jl[j]; jl[j] = i; /* update jl */ 809 } 810 i = nexti; 811 } 812 813 /* save nonzero entries in k-th row of U ... */ 814 815 /* invert diagonal block */ 816 diag = ba+k*4; 817 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 818 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 819 820 jmin = bi[k]; jmax = bi[k+1]; 821 if (jmin < jmax) { 822 for (j=jmin; j<jmax; j++){ 823 vj = bj[j]; /* block col. index of U */ 824 u = ba + j*4; 825 rtmp_ptr = rtmp + vj*4; 826 for (k1=0; k1<4; k1++){ 827 *u++ = *rtmp_ptr; 828 *rtmp_ptr++ = 0.0; 829 } 830 } 831 832 /* ... add k to row list for first nonzero entry in k-th row */ 833 il[k] = jmin; 834 i = bj[jmin]; 835 jl[k] = jl[i]; jl[i] = k; 836 } 837 } 838 839 ierr = PetscFree(rtmp);CHKERRQ(ierr); 840 ierr = PetscFree(il);CHKERRQ(ierr); 841 ierr = PetscFree(dk);CHKERRQ(ierr); 842 843 C->factor = FACTOR_CHOLESKY; 844 C->assembled = PETSC_TRUE; 845 C->preallocated = PETSC_TRUE; 846 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 847 PetscFunctionReturn(0); 848 } 849 850 /* 851 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 852 Version for blocks are 1 by 1. 853 */ 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 856 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 857 { 858 Mat C = *B; 859 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 860 IS ip = b->row; 861 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 862 int *ai,*aj,*r; 863 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 864 MatScalar *rtmp; 865 MatScalar *ba = b->a,*aa,ak; 866 MatScalar dk,uikdi; 867 868 PetscFunctionBegin; 869 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 870 if (!a->permute){ 871 ai = a->i; aj = a->j; aa = a->a; 872 } else { 873 ai = a->inew; aj = a->jnew; 874 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 875 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 876 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 877 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 878 879 jmin = ai[0]; jmax = ai[mbs]; 880 for (j=jmin; j<jmax; j++){ 881 while (r[j] != j){ 882 k = r[j]; r[j] = r[k]; r[k] = k; 883 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 884 } 885 } 886 ierr = PetscFree(r);CHKERRQ(ierr); 887 } 888 889 /* initialization */ 890 /* il and jl record the first nonzero element in each row of the accessing 891 window U(0:k, k:mbs-1). 892 jl: list of rows to be added to uneliminated rows 893 i>= k: jl(i) is the first row to be added to row i 894 i< k: jl(i) is the row following row i in some list of rows 895 jl(i) = mbs indicates the end of a list 896 il(i): points to the first nonzero element in columns k,...,mbs-1 of 897 row i of U */ 898 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 899 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 900 jl = il + mbs; 901 for (i=0; i<mbs; i++) { 902 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 903 } 904 905 /* for each row k */ 906 for (k = 0; k<mbs; k++){ 907 908 /*initialize k-th row with elements nonzero in row perm(k) of A */ 909 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 910 911 for (j = jmin; j < jmax; j++){ 912 vj = rip[aj[j]]; 913 rtmp[vj] = aa[j]; 914 } 915 916 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 917 dk = rtmp[k]; 918 i = jl[k]; /* first row to be added to k_th row */ 919 920 while (i < k){ 921 nexti = jl[i]; /* next row to be added to k_th row */ 922 923 /* compute multiplier, update D(k) and U(i,k) */ 924 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 925 uikdi = - ba[ili]*ba[i]; 926 dk += uikdi*ba[ili]; 927 ba[ili] = uikdi; /* -U(i,k) */ 928 929 /* add multiple of row i to k-th row ... */ 930 jmin = ili + 1; jmax = bi[i+1]; 931 if (jmin < jmax){ 932 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 933 /* ... add i to row list for next nonzero entry */ 934 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 935 j = bj[jmin]; 936 jl[i] = jl[j]; jl[j] = i; /* update jl */ 937 } 938 i = nexti; 939 } 940 941 /* check for zero pivot and save diagoanl element */ 942 if (dk == 0.0){ 943 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 944 /* 945 } else if (PetscRealPart(dk) < 0.0){ 946 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 947 */ 948 } 949 950 /* save nonzero entries in k-th row of U ... */ 951 ba[k] = 1.0/dk; 952 jmin = bi[k]; jmax = bi[k+1]; 953 if (jmin < jmax) { 954 for (j=jmin; j<jmax; j++){ 955 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 956 } 957 /* ... add k to row list for first nonzero entry in k-th row */ 958 il[k] = jmin; 959 i = bj[jmin]; 960 jl[k] = jl[i]; jl[i] = k; 961 } 962 } 963 964 ierr = PetscFree(rtmp);CHKERRQ(ierr); 965 ierr = PetscFree(il);CHKERRQ(ierr); 966 if (a->permute){ 967 ierr = PetscFree(aa);CHKERRQ(ierr); 968 } 969 970 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 971 C->factor = FACTOR_CHOLESKY; 972 C->assembled = PETSC_TRUE; 973 C->preallocated = PETSC_TRUE; 974 PetscLogFlops(b->mbs); 975 PetscFunctionReturn(0); 976 } 977 978 /* 979 Version for when blocks are 1 by 1 Using natural ordering 980 */ 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 983 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 984 { 985 Mat C = *B; 986 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 987 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 988 int *ai,*aj; 989 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 990 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 991 992 PetscFunctionBegin; 993 994 /* initialization */ 995 /* il and jl record the first nonzero element in each row of the accessing 996 window U(0:k, k:mbs-1). 997 jl: list of rows to be added to uneliminated rows 998 i>= k: jl(i) is the first row to be added to row i 999 i< k: jl(i) is the row following row i in some list of rows 1000 jl(i) = mbs indicates the end of a list 1001 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1002 row i of U */ 1003 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1004 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1005 jl = il + mbs; 1006 for (i=0; i<mbs; i++) { 1007 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1008 } 1009 1010 ai = a->i; aj = a->j; aa = a->a; 1011 1012 /* for each row k */ 1013 for (k = 0; k<mbs; k++){ 1014 1015 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1016 jmin = ai[k]; jmax = ai[k+1]; 1017 1018 for (j = jmin; j < jmax; j++){ 1019 vj = aj[j]; 1020 rtmp[vj] = aa[j]; 1021 } 1022 1023 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1024 dk = rtmp[k]; 1025 i = jl[k]; /* first row to be added to k_th row */ 1026 1027 while (i < k){ 1028 nexti = jl[i]; /* next row to be added to k_th row */ 1029 1030 /* compute multiplier, update D(k) and U(i,k) */ 1031 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1032 uikdi = - ba[ili]*ba[i]; 1033 dk += uikdi*ba[ili]; 1034 ba[ili] = uikdi; /* -U(i,k) */ 1035 1036 /* add multiple of row i to k-th row ... */ 1037 jmin = ili + 1; jmax = bi[i+1]; 1038 if (jmin < jmax){ 1039 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1040 /* ... add i to row list for next nonzero entry */ 1041 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1042 j = bj[jmin]; 1043 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1044 } 1045 i = nexti; 1046 } 1047 1048 /* check for zero pivot and save diagoanl element */ 1049 if (dk == 0.0){ 1050 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1051 /* 1052 } else if (PetscRealPart(dk) < 0){ 1053 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1054 */ 1055 } 1056 1057 /* save nonzero entries in k-th row of U ... */ 1058 ba[k] = 1.0/dk; 1059 jmin = bi[k]; jmax = bi[k+1]; 1060 if (jmin < jmax) { 1061 for (j=jmin; j<jmax; j++){ 1062 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1063 } 1064 /* ... add k to row list for first nonzero entry in k-th row */ 1065 il[k] = jmin; 1066 i = bj[jmin]; 1067 jl[k] = jl[i]; jl[i] = k; 1068 } 1069 } 1070 1071 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1072 ierr = PetscFree(il);CHKERRQ(ierr); 1073 1074 C->factor = FACTOR_CHOLESKY; 1075 C->assembled = PETSC_TRUE; 1076 C->preallocated = PETSC_TRUE; 1077 PetscLogFlops(b->mbs); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1083 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1084 { 1085 int ierr; 1086 Mat C; 1087 1088 PetscFunctionBegin; 1089 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1090 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1091 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 1096