1 /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/mat/impls/sbaij/seq/sbaij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/ilu.h" 7 #include "include/petscis.h" 8 9 #if !defined(PETSC_USE_COMPLEX) 10 /* 11 input: 12 F -- numeric factor 13 output: 14 nneg, nzero, npos: matrix inertia 15 */ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 19 int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 20 { 21 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 22 PetscScalar *dd = fact_ptr->a; 23 int m = F->m,i; 24 25 PetscFunctionBegin; 26 if (nneig){ 27 *nneig = 0; 28 for (i=0; i<m; i++){ 29 if (PetscRealPart(dd[i]) < 0.0) (*nneig)++; 30 } 31 } 32 if (nzero){ 33 *nzero = 0; 34 for (i=0; i<m; i++){ 35 if (PetscRealPart(dd[i]) == 0.0) (*nzero)++; 36 } 37 } 38 if (npos){ 39 *npos = 0; 40 for (i=0; i<m; i++){ 41 if (PetscRealPart(dd[i]) > 0.0) (*npos)++; 42 } 43 } 44 PetscFunctionReturn(0); 45 } 46 #endif /* !defined(PETSC_USE_COMPLEX) */ 47 48 /* Using Modified Sparse Row (MSR) storage. 49 See page 85, "Iterative Methods ..." by Saad. */ 50 /* 51 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 52 */ 53 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 56 int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 59 int *rip,ierr,i,mbs = a->mbs,*ai,*aj; 60 int *jutmp,bs = a->bs,bs2=a->bs2; 61 int m,realloc = 0,prow; 62 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 63 int *il,ili,nextprow; 64 PetscReal f = info->fill; 65 PetscTruth perm_identity; 66 67 PetscFunctionBegin; 68 /* check whether perm is the identity mapping */ 69 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 70 71 /* -- inplace factorization, i.e., use sbaij for *B -- */ 72 if (perm_identity && bs==1 ){ 73 if (!perm_identity) a->permute = PETSC_TRUE; 74 75 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 76 77 if (perm_identity){ /* without permutation */ 78 ai = a->i; aj = a->j; 79 } else { /* non-trivial permutation */ 80 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 81 ai = a->inew; aj = a->jnew; 82 } 83 84 /* initialization */ 85 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 86 umax = (int)(f*ai[mbs] + 1); 87 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 88 iu[0] = 0; 89 juidx = 0; /* index for ju */ 90 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 91 q = jl + mbs; /* linked list for col index of active row */ 92 il = q + mbs; 93 for (i=0; i<mbs; i++){ 94 jl[i] = mbs; 95 q[i] = 0; 96 il[i] = 0; 97 } 98 99 /* for each row k */ 100 for (k=0; k<mbs; k++){ 101 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 102 q[k] = mbs; 103 /* initialize nonzero structure of k-th row to row rip[k] of A */ 104 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 105 jmax = ai[rip[k]+1]; 106 for (j=jmin; j<jmax; j++){ 107 vj = rip[aj[j]]; /* col. value */ 108 if(vj > k){ 109 qm = k; 110 do { 111 m = qm; qm = q[m]; 112 } while(qm < vj); 113 if (qm == vj) { 114 SETERRQ(1," error: duplicate entry in A\n"); 115 } 116 nzk++; 117 q[m] = vj; 118 q[vj] = qm; 119 } /* if(vj > k) */ 120 } /* for (j=jmin; j<jmax; j++) */ 121 122 /* modify nonzero structure of k-th row by computing fill-in 123 for each row i to be merged in */ 124 prow = k; 125 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 126 127 while (prow < k){ 128 nextprow = jl[prow]; 129 130 /* merge row prow into k-th row */ 131 ili = il[prow]; 132 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 133 jmax = iu[prow+1]; 134 qm = k; 135 for (j=jmin; j<jmax; j++){ 136 vj = ju[j]; 137 do { 138 m = qm; qm = q[m]; 139 } while (qm < vj); 140 if (qm != vj){ /* a fill */ 141 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 142 } 143 } /* end of for (j=jmin; j<jmax; j++) */ 144 if (jmin < jmax){ 145 il[prow] = jmin; 146 j = ju[jmin]; 147 jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 148 } 149 prow = nextprow; 150 } 151 152 /* update il and jl */ 153 if (nzk > 0){ 154 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 155 jl[k] = jl[i]; jl[i] = k; 156 il[k] = iu[k] + 1; 157 } 158 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 159 160 /* allocate more space to ju if needed */ 161 if (iu[k+1] > umax) { 162 /* estimate how much additional space we will need */ 163 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 164 /* just double the memory each time */ 165 maxadd = umax; 166 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 167 umax += maxadd; 168 169 /* allocate a longer ju */ 170 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 171 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 172 ierr = PetscFree(ju);CHKERRQ(ierr); 173 ju = jutmp; 174 realloc++; /* count how many times we realloc */ 175 } 176 177 /* save nonzero structure of k-th row in ju */ 178 ju[juidx++] = k; /* diag[k] */ 179 i = k; 180 while (nzk --) { 181 i = q[i]; 182 ju[juidx++] = i; 183 } 184 } 185 186 if (ai[mbs] != 0) { 187 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 188 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 189 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 190 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 191 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 192 } else { 193 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 194 } 195 196 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 197 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 198 ierr = PetscFree(jl);CHKERRQ(ierr); 199 200 /* put together the new matrix */ 201 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 202 /* PetscLogObjectParent(*B,iperm); */ 203 b = (Mat_SeqSBAIJ*)(*B)->data; 204 ierr = PetscFree(b->imax);CHKERRQ(ierr); 205 b->singlemalloc = PETSC_FALSE; 206 /* the next line frees the default space generated by the Create() */ 207 ierr = PetscFree(b->a);CHKERRQ(ierr); 208 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 209 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 210 b->j = ju; 211 b->i = iu; 212 b->diag = 0; 213 b->ilen = 0; 214 b->imax = 0; 215 b->row = perm; 216 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 217 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 218 b->icol = perm; 219 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 220 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 221 /* In b structure: Free imax, ilen, old a, old j. 222 Allocate idnew, solve_work, new a, new j */ 223 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 224 b->s_maxnz = b->s_nz = iu[mbs]; 225 226 (*B)->factor = FACTOR_CHOLESKY; 227 (*B)->info.factor_mallocs = realloc; 228 (*B)->info.fill_ratio_given = f; 229 if (ai[mbs] != 0) { 230 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 231 } else { 232 (*B)->info.fill_ratio_needed = 0.0; 233 } 234 235 236 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 237 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 238 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 239 240 PetscFunctionReturn(0); 241 } 242 /* ----------- end of new code --------------------*/ 243 244 245 if (!perm_identity) a->permute = PETSC_TRUE; 246 247 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 248 249 if (perm_identity){ /* without permutation */ 250 ai = a->i; aj = a->j; 251 } else { /* non-trivial permutation */ 252 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 253 ai = a->inew; aj = a->jnew; 254 } 255 256 /* initialization */ 257 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 258 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 259 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 260 iu[0] = mbs+1; 261 juidx = mbs + 1; /* index for ju */ 262 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 263 q = jl + mbs; /* linked list for col index */ 264 for (i=0; i<mbs; i++){ 265 jl[i] = mbs; 266 q[i] = 0; 267 } 268 269 /* for each row k */ 270 for (k=0; k<mbs; k++){ 271 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 272 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 273 q[k] = mbs; 274 /* initialize nonzero structure of k-th row to row rip[k] of A */ 275 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 276 jmax = ai[rip[k]+1]; 277 for (j=jmin; j<jmax; j++){ 278 vj = rip[aj[j]]; /* col. value */ 279 if(vj > k){ 280 qm = k; 281 do { 282 m = qm; qm = q[m]; 283 } while(qm < vj); 284 if (qm == vj) { 285 SETERRQ(1," error: duplicate entry in A\n"); 286 } 287 nzk++; 288 q[m] = vj; 289 q[vj] = qm; 290 } /* if(vj > k) */ 291 } /* for (j=jmin; j<jmax; j++) */ 292 293 /* modify nonzero structure of k-th row by computing fill-in 294 for each row i to be merged in */ 295 prow = k; 296 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 297 298 while (prow < k){ 299 /* merge row prow into k-th row */ 300 jmin = iu[prow] + 1; jmax = iu[prow+1]; 301 qm = k; 302 for (j=jmin; j<jmax; j++){ 303 vj = ju[j]; 304 do { 305 m = qm; qm = q[m]; 306 } while (qm < vj); 307 if (qm != vj){ 308 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 309 } 310 } 311 prow = jl[prow]; /* next pivot row */ 312 } 313 314 /* add k to row list for first nonzero element in k-th row */ 315 if (nzk > 0){ 316 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 317 jl[k] = jl[i]; jl[i] = k; 318 } 319 iu[k+1] = iu[k] + nzk; 320 321 /* allocate more space to ju if needed */ 322 if (iu[k+1] > umax) { 323 /* estimate how much additional space we will need */ 324 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 325 /* just double the memory each time */ 326 maxadd = umax; 327 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 328 umax += maxadd; 329 330 /* allocate a longer ju */ 331 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 332 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 333 ierr = PetscFree(ju);CHKERRQ(ierr); 334 ju = jutmp; 335 realloc++; /* count how many times we realloc */ 336 } 337 338 /* save nonzero structure of k-th row in ju */ 339 i=k; 340 while (nzk --) { 341 i = q[i]; 342 ju[juidx++] = i; 343 } 344 } 345 346 if (ai[mbs] != 0) { 347 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 348 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 349 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 350 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 351 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 352 } else { 353 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 354 } 355 356 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 357 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 358 ierr = PetscFree(jl);CHKERRQ(ierr); 359 360 /* put together the new matrix */ 361 ierr = MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);CHKERRQ(ierr); 362 /* PetscLogObjectParent(*B,iperm); */ 363 b = (Mat_SeqSBAIJ*)(*B)->data; 364 ierr = PetscFree(b->imax);CHKERRQ(ierr); 365 b->singlemalloc = PETSC_FALSE; 366 /* the next line frees the default space generated by the Create() */ 367 ierr = PetscFree(b->a);CHKERRQ(ierr); 368 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 369 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 370 b->j = ju; 371 b->i = iu; 372 b->diag = 0; 373 b->ilen = 0; 374 b->imax = 0; 375 b->row = perm; 376 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 377 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 378 b->icol = perm; 379 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 380 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 381 /* In b structure: Free imax, ilen, old a, old j. 382 Allocate idnew, solve_work, new a, new j */ 383 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 384 b->s_maxnz = b->s_nz = iu[mbs]; 385 386 (*B)->factor = FACTOR_CHOLESKY; 387 (*B)->info.factor_mallocs = realloc; 388 (*B)->info.fill_ratio_given = f; 389 if (ai[mbs] != 0) { 390 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 391 } else { 392 (*B)->info.fill_ratio_needed = 0.0; 393 } 394 395 if (perm_identity){ 396 switch (bs) { 397 case 1: 398 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 399 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 400 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 401 break; 402 case 2: 403 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 404 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 405 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 406 break; 407 case 3: 408 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 409 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 410 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 411 break; 412 case 4: 413 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 414 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 415 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 416 break; 417 case 5: 418 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 419 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 420 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 421 break; 422 case 6: 423 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 424 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 425 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 426 break; 427 case 7: 428 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 429 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 430 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 431 break; 432 default: 433 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 434 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 435 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 436 break; 437 } 438 } 439 440 PetscFunctionReturn(0); 441 } 442 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 446 int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 447 { 448 Mat C = *B; 449 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 450 IS perm = b->row; 451 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 452 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 453 int bs=a->bs,bs2 = a->bs2; 454 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 455 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 456 MatScalar *work; 457 int *pivots; 458 459 PetscFunctionBegin; 460 461 /* initialization */ 462 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 463 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 464 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 465 jl = il + mbs; 466 for (i=0; i<mbs; i++) { 467 jl[i] = mbs; il[0] = 0; 468 } 469 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 470 uik = dk + bs2; 471 work = uik + bs2; 472 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 473 474 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 475 476 /* check permutation */ 477 if (!a->permute){ 478 ai = a->i; aj = a->j; aa = a->a; 479 } else { 480 ai = a->inew; aj = a->jnew; 481 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 482 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 483 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 484 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 485 486 for (i=0; i<mbs; i++){ 487 jmin = ai[i]; jmax = ai[i+1]; 488 for (j=jmin; j<jmax; j++){ 489 while (a2anew[j] != j){ 490 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 491 for (k1=0; k1<bs2; k1++){ 492 dk[k1] = aa[k*bs2+k1]; 493 aa[k*bs2+k1] = aa[j*bs2+k1]; 494 aa[j*bs2+k1] = dk[k1]; 495 } 496 } 497 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 498 if (i > aj[j]){ 499 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 500 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 501 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 502 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 503 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 504 } 505 } 506 } 507 } 508 ierr = PetscFree(a2anew);CHKERRQ(ierr); 509 } 510 511 /* for each row k */ 512 for (k = 0; k<mbs; k++){ 513 514 /*initialize k-th row with elements nonzero in row perm(k) of A */ 515 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 516 517 ap = aa + jmin*bs2; 518 for (j = jmin; j < jmax; j++){ 519 vj = perm_ptr[aj[j]]; /* block col. index */ 520 rtmp_ptr = rtmp + vj*bs2; 521 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 522 } 523 524 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 525 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 526 i = jl[k]; /* first row to be added to k_th row */ 527 528 while (i < k){ 529 nexti = jl[i]; /* next row to be added to k_th row */ 530 531 /* compute multiplier */ 532 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 533 534 /* uik = -inv(Di)*U_bar(i,k) */ 535 diag = ba + i*bs2; 536 u = ba + ili*bs2; 537 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 538 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 539 540 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 541 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 542 543 /* update -U(i,k) */ 544 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 545 546 /* add multiple of row i to k-th row ... */ 547 jmin = ili + 1; jmax = bi[i+1]; 548 if (jmin < jmax){ 549 for (j=jmin; j<jmax; j++) { 550 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 551 rtmp_ptr = rtmp + bj[j]*bs2; 552 u = ba + j*bs2; 553 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 554 } 555 556 /* ... add i to row list for next nonzero entry */ 557 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 558 j = bj[jmin]; 559 jl[i] = jl[j]; jl[j] = i; /* update jl */ 560 } 561 i = nexti; 562 } 563 564 /* save nonzero entries in k-th row of U ... */ 565 566 /* invert diagonal block */ 567 diag = ba+k*bs2; 568 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 569 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 570 571 jmin = bi[k]; jmax = bi[k+1]; 572 if (jmin < jmax) { 573 for (j=jmin; j<jmax; j++){ 574 vj = bj[j]; /* block col. index of U */ 575 u = ba + j*bs2; 576 rtmp_ptr = rtmp + vj*bs2; 577 for (k1=0; k1<bs2; k1++){ 578 *u++ = *rtmp_ptr; 579 *rtmp_ptr++ = 0.0; 580 } 581 } 582 583 /* ... add k to row list for first nonzero entry in k-th row */ 584 il[k] = jmin; 585 i = bj[jmin]; 586 jl[k] = jl[i]; jl[i] = k; 587 } 588 } 589 590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 591 ierr = PetscFree(il);CHKERRQ(ierr); 592 ierr = PetscFree(dk);CHKERRQ(ierr); 593 ierr = PetscFree(pivots);CHKERRQ(ierr); 594 if (a->permute){ 595 ierr = PetscFree(aa);CHKERRQ(ierr); 596 } 597 598 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 599 C->factor = FACTOR_CHOLESKY; 600 C->assembled = PETSC_TRUE; 601 C->preallocated = PETSC_TRUE; 602 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 608 int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 609 { 610 Mat C = *B; 611 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 612 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 613 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 614 int bs=a->bs,bs2 = a->bs2; 615 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 616 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 617 MatScalar *work; 618 int *pivots; 619 620 PetscFunctionBegin; 621 622 /* initialization */ 623 624 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 625 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 626 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 627 jl = il + mbs; 628 for (i=0; i<mbs; i++) { 629 jl[i] = mbs; il[0] = 0; 630 } 631 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 632 uik = dk + bs2; 633 work = uik + bs2; 634 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 635 636 ai = a->i; aj = a->j; aa = a->a; 637 638 /* for each row k */ 639 for (k = 0; k<mbs; k++){ 640 641 /*initialize k-th row with elements nonzero in row k of A */ 642 jmin = ai[k]; jmax = ai[k+1]; 643 ap = aa + jmin*bs2; 644 for (j = jmin; j < jmax; j++){ 645 vj = aj[j]; /* block col. index */ 646 rtmp_ptr = rtmp + vj*bs2; 647 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 648 } 649 650 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 651 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 652 i = jl[k]; /* first row to be added to k_th row */ 653 654 while (i < k){ 655 nexti = jl[i]; /* next row to be added to k_th row */ 656 657 /* compute multiplier */ 658 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 659 660 /* uik = -inv(Di)*U_bar(i,k) */ 661 diag = ba + i*bs2; 662 u = ba + ili*bs2; 663 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 665 666 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 667 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 668 669 /* update -U(i,k) */ 670 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 672 /* add multiple of row i to k-th row ... */ 673 jmin = ili + 1; jmax = bi[i+1]; 674 if (jmin < jmax){ 675 for (j=jmin; j<jmax; j++) { 676 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 677 rtmp_ptr = rtmp + bj[j]*bs2; 678 u = ba + j*bs2; 679 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 680 } 681 682 /* ... add i to row list for next nonzero entry */ 683 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 684 j = bj[jmin]; 685 jl[i] = jl[j]; jl[j] = i; /* update jl */ 686 } 687 i = nexti; 688 } 689 690 /* save nonzero entries in k-th row of U ... */ 691 692 /* invert diagonal block */ 693 diag = ba+k*bs2; 694 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 695 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 696 697 jmin = bi[k]; jmax = bi[k+1]; 698 if (jmin < jmax) { 699 for (j=jmin; j<jmax; j++){ 700 vj = bj[j]; /* block col. index of U */ 701 u = ba + j*bs2; 702 rtmp_ptr = rtmp + vj*bs2; 703 for (k1=0; k1<bs2; k1++){ 704 *u++ = *rtmp_ptr; 705 *rtmp_ptr++ = 0.0; 706 } 707 } 708 709 /* ... add k to row list for first nonzero entry in k-th row */ 710 il[k] = jmin; 711 i = bj[jmin]; 712 jl[k] = jl[i]; jl[i] = k; 713 } 714 } 715 716 ierr = PetscFree(rtmp);CHKERRQ(ierr); 717 ierr = PetscFree(il);CHKERRQ(ierr); 718 ierr = PetscFree(dk);CHKERRQ(ierr); 719 ierr = PetscFree(pivots);CHKERRQ(ierr); 720 721 C->factor = FACTOR_CHOLESKY; 722 C->assembled = PETSC_TRUE; 723 C->preallocated = PETSC_TRUE; 724 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 730 Version for blocks 2 by 2. 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 734 int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 735 { 736 Mat C = *B; 737 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 738 IS perm = b->row; 739 int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 740 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 741 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 742 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 743 744 PetscFunctionBegin; 745 746 /* initialization */ 747 /* il and jl record the first nonzero element in each row of the accessing 748 window U(0:k, k:mbs-1). 749 jl: list of rows to be added to uneliminated rows 750 i>= k: jl(i) is the first row to be added to row i 751 i< k: jl(i) is the row following row i in some list of rows 752 jl(i) = mbs indicates the end of a list 753 il(i): points to the first nonzero element in columns k,...,mbs-1 of 754 row i of U */ 755 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 756 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 757 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 758 jl = il + mbs; 759 for (i=0; i<mbs; i++) { 760 jl[i] = mbs; il[0] = 0; 761 } 762 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 763 uik = dk + 4; 764 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 765 766 /* check permutation */ 767 if (!a->permute){ 768 ai = a->i; aj = a->j; aa = a->a; 769 } else { 770 ai = a->inew; aj = a->jnew; 771 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 772 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 773 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 774 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 775 776 for (i=0; i<mbs; i++){ 777 jmin = ai[i]; jmax = ai[i+1]; 778 for (j=jmin; j<jmax; j++){ 779 while (a2anew[j] != j){ 780 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 781 for (k1=0; k1<4; k1++){ 782 dk[k1] = aa[k*4+k1]; 783 aa[k*4+k1] = aa[j*4+k1]; 784 aa[j*4+k1] = dk[k1]; 785 } 786 } 787 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 788 if (i > aj[j]){ 789 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 790 ap = aa + j*4; /* ptr to the beginning of the block */ 791 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 792 ap[1] = ap[2]; 793 ap[2] = dk[1]; 794 } 795 } 796 } 797 ierr = PetscFree(a2anew);CHKERRQ(ierr); 798 } 799 800 /* for each row k */ 801 for (k = 0; k<mbs; k++){ 802 803 /*initialize k-th row with elements nonzero in row perm(k) of A */ 804 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 805 ap = aa + jmin*4; 806 for (j = jmin; j < jmax; j++){ 807 vj = perm_ptr[aj[j]]; /* block col. index */ 808 rtmp_ptr = rtmp + vj*4; 809 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 810 } 811 812 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 813 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 814 i = jl[k]; /* first row to be added to k_th row */ 815 816 while (i < k){ 817 nexti = jl[i]; /* next row to be added to k_th row */ 818 819 /* compute multiplier */ 820 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 821 822 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 823 diag = ba + i*4; 824 u = ba + ili*4; 825 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 826 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 827 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 828 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 829 830 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 831 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 832 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 833 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 834 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 835 836 /* update -U(i,k): ba[ili] = uik */ 837 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 838 839 /* add multiple of row i to k-th row ... */ 840 jmin = ili + 1; jmax = bi[i+1]; 841 if (jmin < jmax){ 842 for (j=jmin; j<jmax; j++) { 843 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 844 rtmp_ptr = rtmp + bj[j]*4; 845 u = ba + j*4; 846 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 847 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 848 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 849 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 850 } 851 852 /* ... add i to row list for next nonzero entry */ 853 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 854 j = bj[jmin]; 855 jl[i] = jl[j]; jl[j] = i; /* update jl */ 856 } 857 i = nexti; 858 } 859 860 /* save nonzero entries in k-th row of U ... */ 861 862 /* invert diagonal block */ 863 diag = ba+k*4; 864 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 865 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 866 867 jmin = bi[k]; jmax = bi[k+1]; 868 if (jmin < jmax) { 869 for (j=jmin; j<jmax; j++){ 870 vj = bj[j]; /* block col. index of U */ 871 u = ba + j*4; 872 rtmp_ptr = rtmp + vj*4; 873 for (k1=0; k1<4; k1++){ 874 *u++ = *rtmp_ptr; 875 *rtmp_ptr++ = 0.0; 876 } 877 } 878 879 /* ... add k to row list for first nonzero entry in k-th row */ 880 il[k] = jmin; 881 i = bj[jmin]; 882 jl[k] = jl[i]; jl[i] = k; 883 } 884 } 885 886 ierr = PetscFree(rtmp);CHKERRQ(ierr); 887 ierr = PetscFree(il);CHKERRQ(ierr); 888 ierr = PetscFree(dk);CHKERRQ(ierr); 889 if (a->permute) { 890 ierr = PetscFree(aa);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 893 C->factor = FACTOR_CHOLESKY; 894 C->assembled = PETSC_TRUE; 895 C->preallocated = PETSC_TRUE; 896 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 897 PetscFunctionReturn(0); 898 } 899 900 /* 901 Version for when blocks are 2 by 2 Using natural ordering 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 905 int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 906 { 907 Mat C = *B; 908 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 909 int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 910 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 911 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 912 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 913 914 PetscFunctionBegin; 915 916 /* initialization */ 917 /* il and jl record the first nonzero element in each row of the accessing 918 window U(0:k, k:mbs-1). 919 jl: list of rows to be added to uneliminated rows 920 i>= k: jl(i) is the first row to be added to row i 921 i< k: jl(i) is the row following row i in some list of rows 922 jl(i) = mbs indicates the end of a list 923 il(i): points to the first nonzero element in columns k,...,mbs-1 of 924 row i of U */ 925 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 926 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 927 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 928 jl = il + mbs; 929 for (i=0; i<mbs; i++) { 930 jl[i] = mbs; il[0] = 0; 931 } 932 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 933 uik = dk + 4; 934 935 ai = a->i; aj = a->j; aa = a->a; 936 937 /* for each row k */ 938 for (k = 0; k<mbs; k++){ 939 940 /*initialize k-th row with elements nonzero in row k of A */ 941 jmin = ai[k]; jmax = ai[k+1]; 942 ap = aa + jmin*4; 943 for (j = jmin; j < jmax; j++){ 944 vj = aj[j]; /* block col. index */ 945 rtmp_ptr = rtmp + vj*4; 946 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 947 } 948 949 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 950 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 951 i = jl[k]; /* first row to be added to k_th row */ 952 953 while (i < k){ 954 nexti = jl[i]; /* next row to be added to k_th row */ 955 956 /* compute multiplier */ 957 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 958 959 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 960 diag = ba + i*4; 961 u = ba + ili*4; 962 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 963 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 964 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 965 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 966 967 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 968 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 969 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 970 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 971 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 972 973 /* update -U(i,k): ba[ili] = uik */ 974 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 975 976 /* add multiple of row i to k-th row ... */ 977 jmin = ili + 1; jmax = bi[i+1]; 978 if (jmin < jmax){ 979 for (j=jmin; j<jmax; j++) { 980 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 981 rtmp_ptr = rtmp + bj[j]*4; 982 u = ba + j*4; 983 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 984 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 985 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 986 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 987 } 988 989 /* ... add i to row list for next nonzero entry */ 990 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 991 j = bj[jmin]; 992 jl[i] = jl[j]; jl[j] = i; /* update jl */ 993 } 994 i = nexti; 995 } 996 997 /* save nonzero entries in k-th row of U ... */ 998 999 /* invert diagonal block */ 1000 diag = ba+k*4; 1001 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1002 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1003 1004 jmin = bi[k]; jmax = bi[k+1]; 1005 if (jmin < jmax) { 1006 for (j=jmin; j<jmax; j++){ 1007 vj = bj[j]; /* block col. index of U */ 1008 u = ba + j*4; 1009 rtmp_ptr = rtmp + vj*4; 1010 for (k1=0; k1<4; k1++){ 1011 *u++ = *rtmp_ptr; 1012 *rtmp_ptr++ = 0.0; 1013 } 1014 } 1015 1016 /* ... add k to row list for first nonzero entry in k-th row */ 1017 il[k] = jmin; 1018 i = bj[jmin]; 1019 jl[k] = jl[i]; jl[i] = k; 1020 } 1021 } 1022 1023 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1024 ierr = PetscFree(il);CHKERRQ(ierr); 1025 ierr = PetscFree(dk);CHKERRQ(ierr); 1026 1027 C->factor = FACTOR_CHOLESKY; 1028 C->assembled = PETSC_TRUE; 1029 C->preallocated = PETSC_TRUE; 1030 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1031 PetscFunctionReturn(0); 1032 } 1033 1034 /* 1035 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 1036 Version for blocks are 1 by 1. 1037 */ 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1040 int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 1041 { 1042 Mat C = *B; 1043 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1044 IS ip = b->row; 1045 int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 1046 int *ai,*aj,*r; 1047 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 1048 MatScalar *rtmp; 1049 MatScalar *ba = b->a,*aa,ak; 1050 MatScalar dk,uikdi; 1051 1052 PetscFunctionBegin; 1053 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1054 if (!a->permute){ 1055 ai = a->i; aj = a->j; aa = a->a; 1056 } else { 1057 ai = a->inew; aj = a->jnew; 1058 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1059 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1060 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 1061 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 1062 1063 jmin = ai[0]; jmax = ai[mbs]; 1064 for (j=jmin; j<jmax; j++){ 1065 while (r[j] != j){ 1066 k = r[j]; r[j] = r[k]; r[k] = k; 1067 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 1068 } 1069 } 1070 ierr = PetscFree(r);CHKERRQ(ierr); 1071 } 1072 1073 /* initialization */ 1074 /* il and jl record the first nonzero element in each row of the accessing 1075 window U(0:k, k:mbs-1). 1076 jl: list of rows to be added to uneliminated rows 1077 i>= k: jl(i) is the first row to be added to row i 1078 i< k: jl(i) is the row following row i in some list of rows 1079 jl(i) = mbs indicates the end of a list 1080 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1081 row i of U */ 1082 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1083 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1084 jl = il + mbs; 1085 for (i=0; i<mbs; i++) { 1086 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1087 } 1088 1089 /* for each row k */ 1090 for (k = 0; k<mbs; k++){ 1091 1092 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1093 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1094 1095 for (j = jmin; j < jmax; j++){ 1096 vj = rip[aj[j]]; 1097 rtmp[vj] = aa[j]; 1098 } 1099 1100 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1101 dk = rtmp[k]; 1102 i = jl[k]; /* first row to be added to k_th row */ 1103 1104 while (i < k){ 1105 nexti = jl[i]; /* next row to be added to k_th row */ 1106 1107 /* compute multiplier, update D(k) and U(i,k) */ 1108 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1109 uikdi = - ba[ili]*ba[i]; 1110 dk += uikdi*ba[ili]; 1111 ba[ili] = uikdi; /* -U(i,k) */ 1112 1113 /* add multiple of row i to k-th row ... */ 1114 jmin = ili + 1; jmax = bi[i+1]; 1115 if (jmin < jmax){ 1116 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1117 /* ... add i to row list for next nonzero entry */ 1118 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1119 j = bj[jmin]; 1120 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1121 } 1122 i = nexti; 1123 } 1124 1125 /* check for zero pivot and save diagoanl element */ 1126 if (dk == 0.0){ 1127 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1128 /* 1129 } else if (PetscRealPart(dk) < 0.0){ 1130 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1131 */ 1132 } 1133 1134 /* save nonzero entries in k-th row of U ... */ 1135 ba[k] = 1.0/dk; 1136 jmin = bi[k]; jmax = bi[k+1]; 1137 if (jmin < jmax) { 1138 for (j=jmin; j<jmax; j++){ 1139 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1140 } 1141 /* ... add k to row list for first nonzero entry in k-th row */ 1142 il[k] = jmin; 1143 i = bj[jmin]; 1144 jl[k] = jl[i]; jl[i] = k; 1145 } 1146 } 1147 1148 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1149 ierr = PetscFree(il);CHKERRQ(ierr); 1150 if (a->permute){ 1151 ierr = PetscFree(aa);CHKERRQ(ierr); 1152 } 1153 1154 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1155 C->factor = FACTOR_CHOLESKY; 1156 C->assembled = PETSC_TRUE; 1157 C->preallocated = PETSC_TRUE; 1158 PetscLogFlops(b->mbs); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 Version for when blocks are 1 by 1 Using natural ordering 1164 */ 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1167 int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1168 { 1169 Mat C = *B; 1170 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1171 int ierr,i,j,mbs = a->mbs; 1172 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1173 int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1174 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1175 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot; 1176 PetscTruth damp; 1177 1178 PetscFunctionBegin; 1179 /* initialization */ 1180 /* il and jl record the first nonzero element in each row of the accessing 1181 window U(0:k, k:mbs-1). 1182 jl: list of rows to be added to uneliminated rows 1183 i>= k: jl(i) is the first row to be added to row i 1184 i< k: jl(i) is the row following row i in some list of rows 1185 jl(i) = mbs indicates the end of a list 1186 il(i): points to the first nonzero element in U(i,k:mbs-1) 1187 */ 1188 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1189 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1190 jl = il + mbs; 1191 1192 do { 1193 damp = PETSC_FALSE; 1194 for (i=0; i<mbs; i++) { 1195 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1196 } 1197 1198 for (k = 0; k<mbs; k++){ /* row k */ 1199 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1200 nz = ai[k+1] - ai[k]; 1201 acol = aj + ai[k]; 1202 aval = aa + ai[k]; 1203 bval = ba + bi[k]; 1204 while (nz -- ){ 1205 rtmp[*acol++] = *aval++; 1206 *bval++ = 0.0; /* for in-place factorization */ 1207 } 1208 /* damp the diagonal of the matrix */ 1209 if (ndamp) rtmp[k] += damping; 1210 1211 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1212 dk = rtmp[k]; 1213 i = jl[k]; /* first row to be added to k_th row */ 1214 1215 while (i < k){ 1216 nexti = jl[i]; /* next row to be added to k_th row */ 1217 1218 /* compute multiplier, update D(k) and U(i,k) */ 1219 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1220 uikdi = - ba[ili]*ba[bi[i]]; 1221 dk += uikdi*ba[ili]; 1222 ba[ili] = uikdi; /* -U(i,k) */ 1223 1224 /* add multiple of row i to k-th row ... */ 1225 jmin = ili + 1; 1226 nz = bi[i+1] - jmin; 1227 if (nz > 0){ 1228 bcol = bj + jmin; 1229 bval = ba + jmin; 1230 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1231 /* ... add i to row list for next nonzero entry */ 1232 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1233 j = bj[jmin]; 1234 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1235 } 1236 i = nexti; 1237 } 1238 1239 /* check for zero pivot and save diagoanl element */ 1240 if (dk < zeropivot){ 1241 if (damping > 0.0) { 1242 if (ndamp) damping *= 2.0; 1243 damp = PETSC_TRUE; 1244 ndamp++; 1245 break; 1246 } else if (PetscAbsScalar(dk) < zeropivot){ 1247 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,dk,zeropivot); 1248 } else { 1249 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",1./PetscRealPart(dk),k); 1250 } 1251 } 1252 1253 /* save nonzero entries in k-th row of U ... */ 1254 ba[bi[k]] = 1.0/dk; 1255 jmin = bi[k]+1; 1256 nz = bi[k+1] - jmin; 1257 if (nz){ 1258 bcol = bj + jmin; 1259 bval = ba + jmin; 1260 while (nz--){ 1261 *bval++ = rtmp[*bcol]; 1262 rtmp[*bcol++] = 0.0; 1263 } 1264 /* ... add k to row list for first nonzero entry in k-th row */ 1265 il[k] = jmin; 1266 i = bj[jmin]; 1267 jl[k] = jl[i]; jl[i] = k; 1268 } 1269 } /* end of for (k = 0; k<mbs; k++) */ 1270 } while (damp); 1271 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1272 ierr = PetscFree(il);CHKERRQ(ierr); 1273 1274 C->factor = FACTOR_CHOLESKY; 1275 C->assembled = PETSC_TRUE; 1276 C->preallocated = PETSC_TRUE; 1277 PetscLogFlops(b->mbs); 1278 if (ndamp) { 1279 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1286 int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 1287 { 1288 int ierr; 1289 Mat C; 1290 1291 PetscFunctionBegin; 1292 ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1293 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1294 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 1299