1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 #if !defined(PETSC_USE_COMPLEX) 10 /* 11 input: 12 F -- numeric factor 13 output: 14 nneg, nzero, npos: matrix inertia 15 */ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 19 int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 20 { 21 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 22 PetscScalar *dd = fact_ptr->a; 23 int m = F->m,i; 24 25 PetscFunctionBegin; 26 if (nneig){ 27 *nneig = 0; 28 for (i=0; i<m; i++){ 29 if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 30 } 31 } 32 if (nzero){ 33 *nzero = 0; 34 for (i=0; i<m; i++){ 35 if (PetscRealPart(dd[i]) == 0.0) (*nzero)++; 36 } 37 } 38 if (npos){ 39 *npos = 0; 40 for (i=0; i<m; i++){ 41 if (PetscRealPart(dd[i]) > 0.0) (*npos)++; 42 } 43 } 44 PetscFunctionReturn(0); 45 } 46 #endif /* !defined(PETSC_USE_COMPLEX) */ 47 48 /* Using Modified Sparse Row (MSR) storage. 49 See page 85, "Iterative Methods ..." by Saad. */ 50 /* 51 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 52 */ 53 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 56 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 59 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 60 int *jutmp,bs = a->bs,bs2=a->bs2; 61 int m,realloc = 0,prow; 62 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 63 int *il,ili,nextprow; 64 PetscTruth perm_identity; 65 66 PetscFunctionBegin; 67 /* check whether perm is the identity mapping */ 68 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 69 70 /* -- inplace factorization, i.e., use sbaij for *B -- */ 71 if (perm_identity && bs==1 ){ 72 if (!perm_identity) a->permute = PETSC_TRUE; 73 74 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 75 76 if (perm_identity){ /* without permutation */ 77 ai = a->i; aj = a->j; 78 } else { /* non-trivial permutation */ 79 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 80 ai = a->inew; aj = a->jnew; 81 } 82 83 /* initialization */ 84 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 85 umax = (int)(f*ai[mbs] + 1); 86 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 87 iu[0] = 0; 88 juidx = 0; /* index for ju */ 89 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 90 q = jl + mbs; /* linked list for col index of active row */ 91 il = q + mbs; 92 for (i=0; i<mbs; i++){ 93 jl[i] = mbs; 94 q[i] = 0; 95 il[i] = 0; 96 } 97 98 /* for each row k */ 99 for (k=0; k<mbs; k++){ 100 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 101 q[k] = mbs; 102 /* initialize nonzero structure of k-th row to row rip[k] of A */ 103 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 104 jmax = ai[rip[k]+1]; 105 for (j=jmin; j<jmax; j++){ 106 vj = rip[aj[j]]; /* col. value */ 107 if(vj > k){ 108 qm = k; 109 do { 110 m = qm; qm = q[m]; 111 } while(qm < vj); 112 if (qm == vj) { 113 SETERRQ(1," error: duplicate entry in A\n"); 114 } 115 nzk++; 116 q[m] = vj; 117 q[vj] = qm; 118 } /* if(vj > k) */ 119 } /* for (j=jmin; j<jmax; j++) */ 120 121 /* modify nonzero structure of k-th row by computing fill-in 122 for each row i to be merged in */ 123 prow = k; 124 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 125 126 while (prow < k){ 127 nextprow = jl[prow]; 128 129 /* merge row prow into k-th row */ 130 ili = il[prow]; 131 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 132 jmax = iu[prow+1]; 133 qm = k; 134 for (j=jmin; j<jmax; j++){ 135 vj = ju[j]; 136 do { 137 m = qm; qm = q[m]; 138 } while (qm < vj); 139 if (qm != vj){ /* a fill */ 140 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 141 } 142 } /* end of for (j=jmin; j<jmax; j++) */ 143 if (jmin < jmax){ 144 il[prow] = jmin; 145 j = ju[jmin]; 146 jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 147 } 148 prow = nextprow; 149 } 150 151 /* update il and jl */ 152 if (nzk > 0){ 153 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 154 jl[k] = jl[i]; jl[i] = k; 155 il[k] = iu[k] + 1; 156 } 157 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 158 159 /* allocate more space to ju if needed */ 160 if (iu[k+1] > umax) { 161 /* estimate how much additional space we will need */ 162 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 163 /* just double the memory each time */ 164 maxadd = umax; 165 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 166 umax += maxadd; 167 168 /* allocate a longer ju */ 169 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 170 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 171 ierr = PetscFree(ju);CHKERRQ(ierr); 172 ju = jutmp; 173 realloc++; /* count how many times we realloc */ 174 } 175 176 /* save nonzero structure of k-th row in ju */ 177 ju[juidx++] = k; /* diag[k] */ 178 i = k; 179 while (nzk --) { 180 i = q[i]; 181 ju[juidx++] = i; 182 } 183 } 184 185 if (ai[mbs] != 0) { 186 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 187 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 188 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 189 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 190 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 191 } else { 192 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 193 } 194 195 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 196 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 197 ierr = PetscFree(jl);CHKERRQ(ierr); 198 199 /* put together the new matrix */ 200 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 201 /* PetscLogObjectParent(*B,iperm); */ 202 b = (Mat_SeqSBAIJ*)(*B)->data; 203 ierr = PetscFree(b->imax);CHKERRQ(ierr); 204 b->singlemalloc = PETSC_FALSE; 205 /* the next line frees the default space generated by the Create() */ 206 ierr = PetscFree(b->a);CHKERRQ(ierr); 207 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 208 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 209 b->j = ju; 210 b->i = iu; 211 b->diag = 0; 212 b->ilen = 0; 213 b->imax = 0; 214 b->row = perm; 215 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 216 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 217 b->icol = perm; 218 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 219 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 220 /* In b structure: Free imax, ilen, old a, old j. 221 Allocate idnew, solve_work, new a, new j */ 222 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 223 b->s_maxnz = b->s_nz = iu[mbs]; 224 225 (*B)->factor = FACTOR_CHOLESKY; 226 (*B)->info.factor_mallocs = realloc; 227 (*B)->info.fill_ratio_given = f; 228 if (ai[mbs] != 0) { 229 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 230 } else { 231 (*B)->info.fill_ratio_needed = 0.0; 232 } 233 234 235 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace; 236 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 237 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 238 239 PetscFunctionReturn(0); 240 } 241 /* ----------- end of new code --------------------*/ 242 243 244 if (!perm_identity) a->permute = PETSC_TRUE; 245 246 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 247 248 if (perm_identity){ /* without permutation */ 249 ai = a->i; aj = a->j; 250 } else { /* non-trivial permutation */ 251 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 252 ai = a->inew; aj = a->jnew; 253 } 254 255 /* initialization */ 256 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 257 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 258 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 259 iu[0] = mbs+1; 260 juidx = mbs + 1; /* index for ju */ 261 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 262 q = jl + mbs; /* linked list for col index */ 263 for (i=0; i<mbs; i++){ 264 jl[i] = mbs; 265 q[i] = 0; 266 } 267 268 /* for each row k */ 269 for (k=0; k<mbs; k++){ 270 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 271 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 272 q[k] = mbs; 273 /* initialize nonzero structure of k-th row to row rip[k] of A */ 274 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 275 jmax = ai[rip[k]+1]; 276 for (j=jmin; j<jmax; j++){ 277 vj = rip[aj[j]]; /* col. value */ 278 if(vj > k){ 279 qm = k; 280 do { 281 m = qm; qm = q[m]; 282 } while(qm < vj); 283 if (qm == vj) { 284 SETERRQ(1," error: duplicate entry in A\n"); 285 } 286 nzk++; 287 q[m] = vj; 288 q[vj] = qm; 289 } /* if(vj > k) */ 290 } /* for (j=jmin; j<jmax; j++) */ 291 292 /* modify nonzero structure of k-th row by computing fill-in 293 for each row i to be merged in */ 294 prow = k; 295 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 296 297 while (prow < k){ 298 /* merge row prow into k-th row */ 299 jmin = iu[prow] + 1; jmax = iu[prow+1]; 300 qm = k; 301 for (j=jmin; j<jmax; j++){ 302 vj = ju[j]; 303 do { 304 m = qm; qm = q[m]; 305 } while (qm < vj); 306 if (qm != vj){ 307 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 308 } 309 } 310 prow = jl[prow]; /* next pivot row */ 311 } 312 313 /* add k to row list for first nonzero element in k-th row */ 314 if (nzk > 0){ 315 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 316 jl[k] = jl[i]; jl[i] = k; 317 } 318 iu[k+1] = iu[k] + nzk; 319 320 /* allocate more space to ju if needed */ 321 if (iu[k+1] > umax) { 322 /* estimate how much additional space we will need */ 323 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 324 /* just double the memory each time */ 325 maxadd = umax; 326 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 327 umax += maxadd; 328 329 /* allocate a longer ju */ 330 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 331 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 332 ierr = PetscFree(ju);CHKERRQ(ierr); 333 ju = jutmp; 334 realloc++; /* count how many times we realloc */ 335 } 336 337 /* save nonzero structure of k-th row in ju */ 338 i=k; 339 while (nzk --) { 340 i = q[i]; 341 ju[juidx++] = i; 342 } 343 } 344 345 if (ai[mbs] != 0) { 346 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 347 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 348 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 349 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 350 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 351 } else { 352 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 353 } 354 355 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 356 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 357 ierr = PetscFree(jl);CHKERRQ(ierr); 358 359 /* put together the new matrix */ 360 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 361 /* PetscLogObjectParent(*B,iperm); */ 362 b = (Mat_SeqSBAIJ*)(*B)->data; 363 ierr = PetscFree(b->imax);CHKERRQ(ierr); 364 b->singlemalloc = PETSC_FALSE; 365 /* the next line frees the default space generated by the Create() */ 366 ierr = PetscFree(b->a);CHKERRQ(ierr); 367 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 368 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 369 b->j = ju; 370 b->i = iu; 371 b->diag = 0; 372 b->ilen = 0; 373 b->imax = 0; 374 b->row = perm; 375 b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */ 376 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 377 b->icol = perm; 378 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 379 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 380 /* In b structure: Free imax, ilen, old a, old j. 381 Allocate idnew, solve_work, new a, new j */ 382 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 383 b->s_maxnz = b->s_nz = iu[mbs]; 384 385 (*B)->factor = FACTOR_CHOLESKY; 386 (*B)->info.factor_mallocs = realloc; 387 (*B)->info.fill_ratio_given = f; 388 if (ai[mbs] != 0) { 389 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 390 } else { 391 (*B)->info.fill_ratio_needed = 0.0; 392 } 393 394 if (perm_identity){ 395 switch (bs) { 396 case 1: 397 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 398 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 399 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 400 break; 401 case 2: 402 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 403 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 404 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 405 break; 406 case 3: 407 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 408 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 409 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 410 break; 411 case 4: 412 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 413 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 414 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 415 break; 416 case 5: 417 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 418 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 419 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 420 break; 421 case 6: 422 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 423 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 424 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 425 break; 426 case 7: 427 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 428 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 429 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 430 break; 431 default: 432 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 433 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 434 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 435 break; 436 } 437 } 438 439 PetscFunctionReturn(0); 440 } 441 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 445 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 446 { 447 Mat C = *B; 448 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 449 IS perm = b->row; 450 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 451 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 452 int bs=a->bs,bs2 = a->bs2; 453 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 454 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 455 MatScalar *work; 456 int *pivots; 457 458 PetscFunctionBegin; 459 460 /* initialization */ 461 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 462 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 463 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 464 jl = il + mbs; 465 for (i=0; i<mbs; i++) { 466 jl[i] = mbs; il[0] = 0; 467 } 468 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 469 uik = dk + bs2; 470 work = uik + bs2; 471 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 472 473 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 474 475 /* check permutation */ 476 if (!a->permute){ 477 ai = a->i; aj = a->j; aa = a->a; 478 } else { 479 ai = a->inew; aj = a->jnew; 480 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 481 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 482 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 483 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 484 485 for (i=0; i<mbs; i++){ 486 jmin = ai[i]; jmax = ai[i+1]; 487 for (j=jmin; j<jmax; j++){ 488 while (a2anew[j] != j){ 489 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 490 for (k1=0; k1<bs2; k1++){ 491 dk[k1] = aa[k*bs2+k1]; 492 aa[k*bs2+k1] = aa[j*bs2+k1]; 493 aa[j*bs2+k1] = dk[k1]; 494 } 495 } 496 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 497 if (i > aj[j]){ 498 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 499 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 500 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 501 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 502 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 503 } 504 } 505 } 506 } 507 ierr = PetscFree(a2anew);CHKERRQ(ierr); 508 } 509 510 /* for each row k */ 511 for (k = 0; k<mbs; k++){ 512 513 /*initialize k-th row with elements nonzero in row perm(k) of A */ 514 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 515 516 ap = aa + jmin*bs2; 517 for (j = jmin; j < jmax; j++){ 518 vj = perm_ptr[aj[j]]; /* block col. index */ 519 rtmp_ptr = rtmp + vj*bs2; 520 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 521 } 522 523 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 524 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 525 i = jl[k]; /* first row to be added to k_th row */ 526 527 while (i < k){ 528 nexti = jl[i]; /* next row to be added to k_th row */ 529 530 /* compute multiplier */ 531 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 532 533 /* uik = -inv(Di)*U_bar(i,k) */ 534 diag = ba + i*bs2; 535 u = ba + ili*bs2; 536 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 537 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 538 539 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 540 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 541 542 /* update -U(i,k) */ 543 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 544 545 /* add multiple of row i to k-th row ... */ 546 jmin = ili + 1; jmax = bi[i+1]; 547 if (jmin < jmax){ 548 for (j=jmin; j<jmax; j++) { 549 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 550 rtmp_ptr = rtmp + bj[j]*bs2; 551 u = ba + j*bs2; 552 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 553 } 554 555 /* ... add i to row list for next nonzero entry */ 556 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 557 j = bj[jmin]; 558 jl[i] = jl[j]; jl[j] = i; /* update jl */ 559 } 560 i = nexti; 561 } 562 563 /* save nonzero entries in k-th row of U ... */ 564 565 /* invert diagonal block */ 566 diag = ba+k*bs2; 567 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 568 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 569 570 jmin = bi[k]; jmax = bi[k+1]; 571 if (jmin < jmax) { 572 for (j=jmin; j<jmax; j++){ 573 vj = bj[j]; /* block col. index of U */ 574 u = ba + j*bs2; 575 rtmp_ptr = rtmp + vj*bs2; 576 for (k1=0; k1<bs2; k1++){ 577 *u++ = *rtmp_ptr; 578 *rtmp_ptr++ = 0.0; 579 } 580 } 581 582 /* ... add k to row list for first nonzero entry in k-th row */ 583 il[k] = jmin; 584 i = bj[jmin]; 585 jl[k] = jl[i]; jl[i] = k; 586 } 587 } 588 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = PetscFree(il);CHKERRQ(ierr); 591 ierr = PetscFree(dk);CHKERRQ(ierr); 592 ierr = PetscFree(pivots);CHKERRQ(ierr); 593 if (a->permute){ 594 ierr = PetscFree(aa);CHKERRQ(ierr); 595 } 596 597 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 598 C->factor = FACTOR_CHOLESKY; 599 C->assembled = PETSC_TRUE; 600 C->preallocated = PETSC_TRUE; 601 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 607 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 608 { 609 Mat C = *B; 610 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 611 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 612 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 613 int bs=a->bs,bs2 = a->bs2; 614 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 615 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 616 MatScalar *work; 617 int *pivots; 618 619 PetscFunctionBegin; 620 621 /* initialization */ 622 623 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 624 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 625 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 626 jl = il + mbs; 627 for (i=0; i<mbs; i++) { 628 jl[i] = mbs; il[0] = 0; 629 } 630 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 631 uik = dk + bs2; 632 work = uik + bs2; 633 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 634 635 ai = a->i; aj = a->j; aa = a->a; 636 637 /* for each row k */ 638 for (k = 0; k<mbs; k++){ 639 640 /*initialize k-th row with elements nonzero in row k of A */ 641 jmin = ai[k]; jmax = ai[k+1]; 642 ap = aa + jmin*bs2; 643 for (j = jmin; j < jmax; j++){ 644 vj = aj[j]; /* block col. index */ 645 rtmp_ptr = rtmp + vj*bs2; 646 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 647 } 648 649 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 650 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 651 i = jl[k]; /* first row to be added to k_th row */ 652 653 while (i < k){ 654 nexti = jl[i]; /* next row to be added to k_th row */ 655 656 /* compute multiplier */ 657 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 658 659 /* uik = -inv(Di)*U_bar(i,k) */ 660 diag = ba + i*bs2; 661 u = ba + ili*bs2; 662 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 663 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 664 665 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 666 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 667 668 /* update -U(i,k) */ 669 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 671 /* add multiple of row i to k-th row ... */ 672 jmin = ili + 1; jmax = bi[i+1]; 673 if (jmin < jmax){ 674 for (j=jmin; j<jmax; j++) { 675 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 676 rtmp_ptr = rtmp + bj[j]*bs2; 677 u = ba + j*bs2; 678 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 679 } 680 681 /* ... add i to row list for next nonzero entry */ 682 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 683 j = bj[jmin]; 684 jl[i] = jl[j]; jl[j] = i; /* update jl */ 685 } 686 i = nexti; 687 } 688 689 /* save nonzero entries in k-th row of U ... */ 690 691 /* invert diagonal block */ 692 diag = ba+k*bs2; 693 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 694 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 695 696 jmin = bi[k]; jmax = bi[k+1]; 697 if (jmin < jmax) { 698 for (j=jmin; j<jmax; j++){ 699 vj = bj[j]; /* block col. index of U */ 700 u = ba + j*bs2; 701 rtmp_ptr = rtmp + vj*bs2; 702 for (k1=0; k1<bs2; k1++){ 703 *u++ = *rtmp_ptr; 704 *rtmp_ptr++ = 0.0; 705 } 706 } 707 708 /* ... add k to row list for first nonzero entry in k-th row */ 709 il[k] = jmin; 710 i = bj[jmin]; 711 jl[k] = jl[i]; jl[i] = k; 712 } 713 } 714 715 ierr = PetscFree(rtmp);CHKERRQ(ierr); 716 ierr = PetscFree(il);CHKERRQ(ierr); 717 ierr = PetscFree(dk);CHKERRQ(ierr); 718 ierr = PetscFree(pivots);CHKERRQ(ierr); 719 720 C->factor = FACTOR_CHOLESKY; 721 C->assembled = PETSC_TRUE; 722 C->preallocated = PETSC_TRUE; 723 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 729 Version for blocks 2 by 2. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 733 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 734 { 735 Mat C = *B; 736 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 737 IS perm = b->row; 738 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 739 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 740 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 741 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 742 743 PetscFunctionBegin; 744 745 /* initialization */ 746 /* il and jl record the first nonzero element in each row of the accessing 747 window U(0:k, k:mbs-1). 748 jl: list of rows to be added to uneliminated rows 749 i>= k: jl(i) is the first row to be added to row i 750 i< k: jl(i) is the row following row i in some list of rows 751 jl(i) = mbs indicates the end of a list 752 il(i): points to the first nonzero element in columns k,...,mbs-1 of 753 row i of U */ 754 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 755 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 756 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 757 jl = il + mbs; 758 for (i=0; i<mbs; i++) { 759 jl[i] = mbs; il[0] = 0; 760 } 761 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 762 uik = dk + 4; 763 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 764 765 /* check permutation */ 766 if (!a->permute){ 767 ai = a->i; aj = a->j; aa = a->a; 768 } else { 769 ai = a->inew; aj = a->jnew; 770 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 771 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 772 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 773 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 774 775 for (i=0; i<mbs; i++){ 776 jmin = ai[i]; jmax = ai[i+1]; 777 for (j=jmin; j<jmax; j++){ 778 while (a2anew[j] != j){ 779 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 780 for (k1=0; k1<4; k1++){ 781 dk[k1] = aa[k*4+k1]; 782 aa[k*4+k1] = aa[j*4+k1]; 783 aa[j*4+k1] = dk[k1]; 784 } 785 } 786 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 787 if (i > aj[j]){ 788 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 789 ap = aa + j*4; /* ptr to the beginning of the block */ 790 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 791 ap[1] = ap[2]; 792 ap[2] = dk[1]; 793 } 794 } 795 } 796 ierr = PetscFree(a2anew);CHKERRQ(ierr); 797 } 798 799 /* for each row k */ 800 for (k = 0; k<mbs; k++){ 801 802 /*initialize k-th row with elements nonzero in row perm(k) of A */ 803 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 804 ap = aa + jmin*4; 805 for (j = jmin; j < jmax; j++){ 806 vj = perm_ptr[aj[j]]; /* block col. index */ 807 rtmp_ptr = rtmp + vj*4; 808 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 809 } 810 811 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 812 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 813 i = jl[k]; /* first row to be added to k_th row */ 814 815 while (i < k){ 816 nexti = jl[i]; /* next row to be added to k_th row */ 817 818 /* compute multiplier */ 819 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 820 821 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 822 diag = ba + i*4; 823 u = ba + ili*4; 824 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 825 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 826 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 827 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 828 829 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 830 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 831 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 832 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 833 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 834 835 /* update -U(i,k): ba[ili] = uik */ 836 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 837 838 /* add multiple of row i to k-th row ... */ 839 jmin = ili + 1; jmax = bi[i+1]; 840 if (jmin < jmax){ 841 for (j=jmin; j<jmax; j++) { 842 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 843 rtmp_ptr = rtmp + bj[j]*4; 844 u = ba + j*4; 845 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 846 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 847 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 848 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 849 } 850 851 /* ... add i to row list for next nonzero entry */ 852 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 853 j = bj[jmin]; 854 jl[i] = jl[j]; jl[j] = i; /* update jl */ 855 } 856 i = nexti; 857 } 858 859 /* save nonzero entries in k-th row of U ... */ 860 861 /* invert diagonal block */ 862 diag = ba+k*4; 863 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 864 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 865 866 jmin = bi[k]; jmax = bi[k+1]; 867 if (jmin < jmax) { 868 for (j=jmin; j<jmax; j++){ 869 vj = bj[j]; /* block col. index of U */ 870 u = ba + j*4; 871 rtmp_ptr = rtmp + vj*4; 872 for (k1=0; k1<4; k1++){ 873 *u++ = *rtmp_ptr; 874 *rtmp_ptr++ = 0.0; 875 } 876 } 877 878 /* ... add k to row list for first nonzero entry in k-th row */ 879 il[k] = jmin; 880 i = bj[jmin]; 881 jl[k] = jl[i]; jl[i] = k; 882 } 883 } 884 885 ierr = PetscFree(rtmp);CHKERRQ(ierr); 886 ierr = PetscFree(il);CHKERRQ(ierr); 887 ierr = PetscFree(dk);CHKERRQ(ierr); 888 if (a->permute) { 889 ierr = PetscFree(aa);CHKERRQ(ierr); 890 } 891 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 892 C->factor = FACTOR_CHOLESKY; 893 C->assembled = PETSC_TRUE; 894 C->preallocated = PETSC_TRUE; 895 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 896 PetscFunctionReturn(0); 897 } 898 899 /* 900 Version for when blocks are 2 by 2 Using natural ordering 901 */ 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 904 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 905 { 906 Mat C = *B; 907 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 908 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 909 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 910 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 911 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 912 913 PetscFunctionBegin; 914 915 /* initialization */ 916 /* il and jl record the first nonzero element in each row of the accessing 917 window U(0:k, k:mbs-1). 918 jl: list of rows to be added to uneliminated rows 919 i>= k: jl(i) is the first row to be added to row i 920 i< k: jl(i) is the row following row i in some list of rows 921 jl(i) = mbs indicates the end of a list 922 il(i): points to the first nonzero element in columns k,...,mbs-1 of 923 row i of U */ 924 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 925 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 926 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 927 jl = il + mbs; 928 for (i=0; i<mbs; i++) { 929 jl[i] = mbs; il[0] = 0; 930 } 931 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 932 uik = dk + 4; 933 934 ai = a->i; aj = a->j; aa = a->a; 935 936 /* for each row k */ 937 for (k = 0; k<mbs; k++){ 938 939 /*initialize k-th row with elements nonzero in row k of A */ 940 jmin = ai[k]; jmax = ai[k+1]; 941 ap = aa + jmin*4; 942 for (j = jmin; j < jmax; j++){ 943 vj = aj[j]; /* block col. index */ 944 rtmp_ptr = rtmp + vj*4; 945 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 946 } 947 948 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 949 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 950 i = jl[k]; /* first row to be added to k_th row */ 951 952 while (i < k){ 953 nexti = jl[i]; /* next row to be added to k_th row */ 954 955 /* compute multiplier */ 956 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 957 958 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 959 diag = ba + i*4; 960 u = ba + ili*4; 961 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 962 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 963 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 964 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 965 966 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 967 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 968 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 969 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 970 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 971 972 /* update -U(i,k): ba[ili] = uik */ 973 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 974 975 /* add multiple of row i to k-th row ... */ 976 jmin = ili + 1; jmax = bi[i+1]; 977 if (jmin < jmax){ 978 for (j=jmin; j<jmax; j++) { 979 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 980 rtmp_ptr = rtmp + bj[j]*4; 981 u = ba + j*4; 982 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 983 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 984 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 985 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 986 } 987 988 /* ... add i to row list for next nonzero entry */ 989 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 990 j = bj[jmin]; 991 jl[i] = jl[j]; jl[j] = i; /* update jl */ 992 } 993 i = nexti; 994 } 995 996 /* save nonzero entries in k-th row of U ... */ 997 998 /* invert diagonal block */ 999 diag = ba+k*4; 1000 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1001 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1002 1003 jmin = bi[k]; jmax = bi[k+1]; 1004 if (jmin < jmax) { 1005 for (j=jmin; j<jmax; j++){ 1006 vj = bj[j]; /* block col. index of U */ 1007 u = ba + j*4; 1008 rtmp_ptr = rtmp + vj*4; 1009 for (k1=0; k1<4; k1++){ 1010 *u++ = *rtmp_ptr; 1011 *rtmp_ptr++ = 0.0; 1012 } 1013 } 1014 1015 /* ... add k to row list for first nonzero entry in k-th row */ 1016 il[k] = jmin; 1017 i = bj[jmin]; 1018 jl[k] = jl[i]; jl[i] = k; 1019 } 1020 } 1021 1022 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1023 ierr = PetscFree(il);CHKERRQ(ierr); 1024 ierr = PetscFree(dk);CHKERRQ(ierr); 1025 1026 C->factor = FACTOR_CHOLESKY; 1027 C->assembled = PETSC_TRUE; 1028 C->preallocated = PETSC_TRUE; 1029 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /* 1034 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 1035 Version for blocks are 1 by 1. 1036 */ 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1039 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 1040 { 1041 Mat C = *B; 1042 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1043 IS ip = b->row; 1044 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 1045 int *ai,*aj,*r; 1046 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 1047 MatScalar *rtmp; 1048 MatScalar *ba = b->a,*aa,ak; 1049 MatScalar dk,uikdi; 1050 1051 PetscFunctionBegin; 1052 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1053 if (!a->permute){ 1054 ai = a->i; aj = a->j; aa = a->a; 1055 } else { 1056 ai = a->inew; aj = a->jnew; 1057 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1058 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1059 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 1060 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 1061 1062 jmin = ai[0]; jmax = ai[mbs]; 1063 for (j=jmin; j<jmax; j++){ 1064 while (r[j] != j){ 1065 k = r[j]; r[j] = r[k]; r[k] = k; 1066 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 1067 } 1068 } 1069 ierr = PetscFree(r);CHKERRQ(ierr); 1070 } 1071 1072 /* initialization */ 1073 /* il and jl record the first nonzero element in each row of the accessing 1074 window U(0:k, k:mbs-1). 1075 jl: list of rows to be added to uneliminated rows 1076 i>= k: jl(i) is the first row to be added to row i 1077 i< k: jl(i) is the row following row i in some list of rows 1078 jl(i) = mbs indicates the end of a list 1079 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1080 row i of U */ 1081 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1082 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1083 jl = il + mbs; 1084 for (i=0; i<mbs; i++) { 1085 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1086 } 1087 1088 /* for each row k */ 1089 for (k = 0; k<mbs; k++){ 1090 1091 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1092 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1093 1094 for (j = jmin; j < jmax; j++){ 1095 vj = rip[aj[j]]; 1096 rtmp[vj] = aa[j]; 1097 } 1098 1099 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1100 dk = rtmp[k]; 1101 i = jl[k]; /* first row to be added to k_th row */ 1102 1103 while (i < k){ 1104 nexti = jl[i]; /* next row to be added to k_th row */ 1105 1106 /* compute multiplier, update D(k) and U(i,k) */ 1107 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1108 uikdi = - ba[ili]*ba[i]; 1109 dk += uikdi*ba[ili]; 1110 ba[ili] = uikdi; /* -U(i,k) */ 1111 1112 /* add multiple of row i to k-th row ... */ 1113 jmin = ili + 1; jmax = bi[i+1]; 1114 if (jmin < jmax){ 1115 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1116 /* ... add i to row list for next nonzero entry */ 1117 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1118 j = bj[jmin]; 1119 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1120 } 1121 i = nexti; 1122 } 1123 1124 /* check for zero pivot and save diagoanl element */ 1125 if (dk == 0.0){ 1126 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1127 /* 1128 } else if (PetscRealPart(dk) < 0.0){ 1129 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1130 */ 1131 } 1132 1133 /* save nonzero entries in k-th row of U ... */ 1134 ba[k] = 1.0/dk; 1135 jmin = bi[k]; jmax = bi[k+1]; 1136 if (jmin < jmax) { 1137 for (j=jmin; j<jmax; j++){ 1138 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1139 } 1140 /* ... add k to row list for first nonzero entry in k-th row */ 1141 il[k] = jmin; 1142 i = bj[jmin]; 1143 jl[k] = jl[i]; jl[i] = k; 1144 } 1145 } 1146 1147 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1148 ierr = PetscFree(il);CHKERRQ(ierr); 1149 if (a->permute){ 1150 ierr = PetscFree(aa);CHKERRQ(ierr); 1151 } 1152 1153 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1154 C->factor = FACTOR_CHOLESKY; 1155 C->assembled = PETSC_TRUE; 1156 C->preallocated = PETSC_TRUE; 1157 PetscLogFlops(b->mbs); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /* 1162 Version for when blocks are 1 by 1 Using natural ordering 1163 */ 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1166 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1167 { 1168 Mat C = *B; 1169 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1170 int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 1171 int *ai,*aj; 1172 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 1173 MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi; 1174 1175 PetscFunctionBegin; 1176 1177 /* initialization */ 1178 /* il and jl record the first nonzero element in each row of the accessing 1179 window U(0:k, k:mbs-1). 1180 jl: list of rows to be added to uneliminated rows 1181 i>= k: jl(i) is the first row to be added to row i 1182 i< k: jl(i) is the row following row i in some list of rows 1183 jl(i) = mbs indicates the end of a list 1184 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1185 row i of U */ 1186 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1187 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1188 jl = il + mbs; 1189 for (i=0; i<mbs; i++) { 1190 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1191 } 1192 1193 ai = a->i; aj = a->j; aa = a->a; 1194 1195 /* for each row k */ 1196 for (k = 0; k<mbs; k++){ 1197 1198 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1199 jmin = ai[k]; jmax = ai[k+1]; 1200 1201 for (j = jmin; j < jmax; j++){ 1202 vj = aj[j]; 1203 rtmp[vj] = aa[j]; 1204 } 1205 1206 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1207 dk = rtmp[k]; 1208 i = jl[k]; /* first row to be added to k_th row */ 1209 1210 while (i < k){ 1211 nexti = jl[i]; /* next row to be added to k_th row */ 1212 1213 /* compute multiplier, update D(k) and U(i,k) */ 1214 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1215 uikdi = - ba[ili]*ba[i]; 1216 dk += uikdi*ba[ili]; 1217 ba[ili] = uikdi; /* -U(i,k) */ 1218 1219 /* add multiple of row i to k-th row ... */ 1220 jmin = ili + 1; jmax = bi[i+1]; 1221 if (jmin < jmax){ 1222 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1223 /* ... add i to row list for next nonzero entry */ 1224 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1225 j = bj[jmin]; 1226 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1227 } 1228 i = nexti; 1229 } 1230 1231 /* check for zero pivot and save diagoanl element */ 1232 if (dk == 0.0){ 1233 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1234 /* 1235 } else if (PetscRealPart(dk) < 0){ 1236 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1237 */ 1238 } 1239 1240 /* save nonzero entries in k-th row of U ... */ 1241 ba[k] = 1.0/dk; 1242 jmin = bi[k]; jmax = bi[k+1]; 1243 if (jmin < jmax) { 1244 for (j=jmin; j<jmax; j++){ 1245 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1246 } 1247 /* ... add k to row list for first nonzero entry in k-th row */ 1248 il[k] = jmin; 1249 i = bj[jmin]; 1250 jl[k] = jl[i]; jl[i] = k; 1251 } 1252 } 1253 1254 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1255 ierr = PetscFree(il);CHKERRQ(ierr); 1256 1257 C->factor = FACTOR_CHOLESKY; 1258 C->assembled = PETSC_TRUE; 1259 C->preallocated = PETSC_TRUE; 1260 PetscLogFlops(b->mbs); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace" 1266 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Mat *B) 1267 { 1268 Mat C = *B; 1269 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1270 int ierr,i,j,mbs = a->mbs; 1271 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1272 int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 1273 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1274 1275 PetscFunctionBegin; 1276 /* initialization */ 1277 /* il and jl record the first nonzero element in each row of the accessing 1278 window U(0:k, k:mbs-1). 1279 jl: list of rows to be added to uneliminated rows 1280 i>= k: jl(i) is the first row to be added to row i 1281 i< k: jl(i) is the row following row i in some list of rows 1282 jl(i) = mbs indicates the end of a list 1283 il(i): points to the first nonzero element in U(i,k:mbs-1) 1284 */ 1285 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1286 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1287 jl = il + mbs; 1288 for (i=0; i<mbs; i++) { 1289 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1290 } 1291 1292 /* for each row k */ 1293 for (k = 0; k<mbs; k++){ 1294 1295 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1296 nz = ai[k+1] - ai[k]; 1297 acol = aj + ai[k]; 1298 aval = aa + ai[k]; 1299 bval = ba + bi[k]; 1300 while (nz -- ){ 1301 rtmp[*acol++] = *aval++; 1302 *bval++ = 0.0; /* for in-place factorization */ 1303 } 1304 1305 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1306 dk = rtmp[k]; 1307 i = jl[k]; /* first row to be added to k_th row */ 1308 1309 while (i < k){ 1310 nexti = jl[i]; /* next row to be added to k_th row */ 1311 /* printf("factnum, k %d, i %d\n", k,i); */ 1312 1313 /* compute multiplier, update D(k) and U(i,k) */ 1314 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1315 uikdi = - ba[ili]*ba[bi[i]]; 1316 dk += uikdi*ba[ili]; 1317 ba[ili] = uikdi; /* -U(i,k) */ 1318 1319 /* add multiple of row i to k-th row ... */ 1320 jmin = ili + 1; 1321 nz = bi[i+1] - jmin; 1322 if (nz > 0){ 1323 bcol = bj + jmin; 1324 bval = ba + jmin; 1325 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1326 /* ... add i to row list for next nonzero entry */ 1327 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1328 j = bj[jmin]; 1329 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1330 } 1331 i = nexti; 1332 } 1333 1334 /* check for zero pivot and save diagoanl element */ 1335 if (PetscRealPart(dk) == 0.0){ 1336 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1337 } else if( PetscRealPart(dk) < 0.0){ 1338 PetscLogInfo((PetscObject)A,"Negative pivot %g in Cholesky factorization\n",1./PetscRealPart(dk)); 1339 } 1340 1341 /* save nonzero entries in k-th row of U ... */ 1342 ba[bi[k]] = 1.0/dk; 1343 jmin = bi[k]+1; 1344 nz = bi[k+1] - jmin; 1345 if (nz){ 1346 bcol = bj + jmin; 1347 bval = ba + jmin; 1348 while (nz--){ 1349 *bval++ = rtmp[*bcol]; 1350 rtmp[*bcol++] = 0.0; 1351 } 1352 /* ... add k to row list for first nonzero entry in k-th row */ 1353 il[k] = jmin; 1354 i = bj[jmin]; 1355 jl[k] = jl[i]; jl[i] = k; 1356 } 1357 } 1358 1359 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1360 ierr = PetscFree(il);CHKERRQ(ierr); 1361 1362 C->factor = FACTOR_CHOLESKY; 1363 C->assembled = PETSC_TRUE; 1364 C->preallocated = PETSC_TRUE; 1365 PetscLogFlops(b->mbs); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1371 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f) 1372 { 1373 int ierr; 1374 Mat C; 1375 1376 PetscFunctionBegin; 1377 ierr = MatCholeskyFactorSymbolic(A,perm,f,&C);CHKERRQ(ierr); 1378 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1379 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 1384