1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/seq/baij.h" 4 #include "../src/mat/impls/sbaij/seq/sbaij.h" 5 #include "../src/mat/blockinvert.h" 6 #include "petscis.h" 7 8 #if !defined(PETSC_USE_COMPLEX) 9 /* 10 input: 11 F -- numeric factor 12 output: 13 nneg, nzero, npos: matrix inertia 14 */ 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 18 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 19 { 20 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 21 MatScalar *dd = fact_ptr->a; 22 PetscInt mbs=fact_ptr->mbs,bs=F->rmap->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i; 23 24 PetscFunctionBegin; 25 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 26 nneig_tmp = 0; npos_tmp = 0; 27 for (i=0; i<mbs; i++){ 28 if (PetscRealPart(dd[*fi]) > 0.0){ 29 npos_tmp++; 30 } else if (PetscRealPart(dd[*fi]) < 0.0){ 31 nneig_tmp++; 32 } 33 fi++; 34 } 35 if (nneig) *nneig = nneig_tmp; 36 if (npos) *npos = npos_tmp; 37 if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 38 39 PetscFunctionReturn(0); 40 } 41 #endif /* !defined(PETSC_USE_COMPLEX) */ 42 43 /* 44 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 45 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 46 */ 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 49 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info) 50 { 51 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 52 PetscErrorCode ierr; 53 const PetscInt *rip,*ai,*aj; 54 PetscInt i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2; 55 PetscInt m,reallocs = 0,prow; 56 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 57 PetscReal f = info->fill; 58 PetscTruth perm_identity; 59 60 PetscFunctionBegin; 61 /* check whether perm is the identity mapping */ 62 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 63 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 64 65 if (perm_identity){ /* without permutation */ 66 a->permute = PETSC_FALSE; 67 ai = a->i; aj = a->j; 68 } else { /* non-trivial permutation */ 69 a->permute = PETSC_TRUE; 70 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 71 ai = a->inew; aj = a->jnew; 72 } 73 74 /* initialization */ 75 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 76 umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 77 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 78 iu[0] = mbs+1; 79 juidx = mbs + 1; /* index for ju */ 80 /* jl linked list for pivot row -- linked list for col index */ 81 ierr = PetscMalloc2(mbs,PetscInt,&jl,mbs,PetscInt,&q);CHKERRQ(ierr); 82 for (i=0; i<mbs; i++){ 83 jl[i] = mbs; 84 q[i] = 0; 85 } 86 87 /* for each row k */ 88 for (k=0; k<mbs; k++){ 89 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 90 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 91 q[k] = mbs; 92 /* initialize nonzero structure of k-th row to row rip[k] of A */ 93 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 94 jmax = ai[rip[k]+1]; 95 for (j=jmin; j<jmax; j++){ 96 vj = rip[aj[j]]; /* col. value */ 97 if(vj > k){ 98 qm = k; 99 do { 100 m = qm; qm = q[m]; 101 } while(qm < vj); 102 if (qm == vj) { 103 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 104 } 105 nzk++; 106 q[m] = vj; 107 q[vj] = qm; 108 } /* if(vj > k) */ 109 } /* for (j=jmin; j<jmax; j++) */ 110 111 /* modify nonzero structure of k-th row by computing fill-in 112 for each row i to be merged in */ 113 prow = k; 114 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 115 116 while (prow < k){ 117 /* merge row prow into k-th row */ 118 jmin = iu[prow] + 1; jmax = iu[prow+1]; 119 qm = k; 120 for (j=jmin; j<jmax; j++){ 121 vj = ju[j]; 122 do { 123 m = qm; qm = q[m]; 124 } while (qm < vj); 125 if (qm != vj){ 126 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 127 } 128 } 129 prow = jl[prow]; /* next pivot row */ 130 } 131 132 /* add k to row list for first nonzero element in k-th row */ 133 if (nzk > 0){ 134 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 135 jl[k] = jl[i]; jl[i] = k; 136 } 137 iu[k+1] = iu[k] + nzk; 138 139 /* allocate more space to ju if needed */ 140 if (iu[k+1] > umax) { 141 /* estimate how much additional space we will need */ 142 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 143 /* just double the memory each time */ 144 maxadd = umax; 145 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 146 umax += maxadd; 147 148 /* allocate a longer ju */ 149 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 150 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 151 ierr = PetscFree(ju);CHKERRQ(ierr); 152 ju = jutmp; 153 reallocs++; /* count how many times we realloc */ 154 } 155 156 /* save nonzero structure of k-th row in ju */ 157 i=k; 158 while (nzk --) { 159 i = q[i]; 160 ju[juidx++] = i; 161 } 162 } 163 164 #if defined(PETSC_USE_INFO) 165 if (ai[mbs] != 0) { 166 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 167 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 168 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 169 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 170 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 171 } else { 172 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 173 } 174 #endif 175 176 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 177 ierr = PetscFree2(jl,q);CHKERRQ(ierr); 178 179 /* put together the new matrix */ 180 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(F,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 181 182 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 183 b = (Mat_SeqSBAIJ*)(F)->data; 184 b->singlemalloc = PETSC_FALSE; 185 b->free_a = PETSC_TRUE; 186 b->free_ij = PETSC_TRUE; 187 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 188 b->j = ju; 189 b->i = iu; 190 b->diag = 0; 191 b->ilen = 0; 192 b->imax = 0; 193 b->row = perm; 194 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 195 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 196 b->icol = perm; 197 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 198 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 199 /* In b structure: Free imax, ilen, old a, old j. 200 Allocate idnew, solve_work, new a, new j */ 201 ierr = PetscLogObjectMemory(F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 202 b->maxnz = b->nz = iu[mbs]; 203 204 (F)->info.factor_mallocs = reallocs; 205 (F)->info.fill_ratio_given = f; 206 if (ai[mbs] != 0) { 207 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 208 } else { 209 (F)->info.fill_ratio_needed = 0.0; 210 } 211 ierr = MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 /* 215 Symbolic U^T*D*U factorization for SBAIJ format. 216 */ 217 #include "petscbt.h" 218 #include "../src/mat/utils/freespace.h" 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 221 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 222 { 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 224 Mat_SeqSBAIJ *b; 225 PetscErrorCode ierr; 226 PetscTruth perm_identity,missing; 227 PetscReal fill = info->fill; 228 const PetscInt *rip,*ai,*aj; 229 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d; 230 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 231 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr; 232 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 233 PetscBT lnkbt; 234 PetscTruth olddatastruct=PETSC_FALSE; 235 236 PetscFunctionBegin; 237 ierr = PetscOptionsGetTruth(PETSC_NULL,"-cholesky_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 238 if (olddatastruct || bs>1 ){ 239 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 SETERRQ(PETSC_ERR_SUP," Not done yet"); 243 244 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 245 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 246 247 /* 248 This code originally uses Modified Sparse Row (MSR) storage 249 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 250 Then it is rewritten so the factor B takes seqsbaij format. However the associated 251 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 252 thus the original code in MSR format is still used for these cases. 253 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 254 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 255 */ 256 if (bs > 1){ 257 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /* check whether perm is the identity mapping */ 262 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 263 264 if (perm_identity){ 265 a->permute = PETSC_FALSE; 266 ai = a->i; aj = a->j; 267 } else { 268 SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 269 /* There are bugs for reordeing. Needs further work. 270 MatReordering for sbaij cannot be efficient. User should use aij formt! */ 271 a->permute = PETSC_TRUE; 272 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 273 ai = a->inew; aj = a->jnew; 274 } 275 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 276 277 /* initialization */ 278 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 279 ui[0] = 0; 280 281 /* jl: linked list for storing indices of the pivot rows 282 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 283 ierr = PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);CHKERRQ(ierr); 284 for (i=0; i<mbs; i++){ 285 jl[i] = mbs; il[i] = 0; 286 } 287 288 /* create and initialize a linked list for storing column indices of the active row k */ 289 nlnk = mbs + 1; 290 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 293 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 294 current_space = free_space; 295 296 for (k=0; k<mbs; k++){ /* for each active row k */ 297 /* initialize lnk by the column indices of row rip[k] of A */ 298 nzk = 0; 299 ncols = ai[rip[k]+1] - ai[rip[k]]; 300 for (j=0; j<ncols; j++){ 301 i = *(aj + ai[rip[k]] + j); 302 cols[j] = rip[i]; 303 } 304 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzk += nlnk; 306 307 /* update lnk by computing fill-in for each pivot row to be merged in */ 308 prow = jl[k]; /* 1st pivot row */ 309 310 while (prow < k){ 311 nextprow = jl[prow]; 312 /* merge prow into k-th row */ 313 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 314 jmax = ui[prow+1]; 315 ncols = jmax-jmin; 316 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 317 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 318 nzk += nlnk; 319 320 /* update il and jl for prow */ 321 if (jmin < jmax){ 322 il[prow] = jmin; 323 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 324 } 325 prow = nextprow; 326 } 327 328 /* if free space is not available, make more free space */ 329 if (current_space->local_remaining<nzk) { 330 i = mbs - k + 1; /* num of unfactored rows */ 331 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 339 /* add the k-th row into il and jl */ 340 if (nzk-1 > 0){ 341 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 342 jl[k] = jl[i]; jl[i] = k; 343 il[k] = ui[k] + 1; 344 } 345 ui_ptr[k] = current_space->array; 346 current_space->array += nzk; 347 current_space->local_used += nzk; 348 current_space->local_remaining -= nzk; 349 350 ui[k+1] = ui[k] + nzk; 351 } 352 353 #if defined(PETSC_USE_INFO) 354 if (ai[mbs] != 0) { 355 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 356 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 357 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 358 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 359 } else { 360 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 361 } 362 #endif 363 364 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 365 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 369 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 372 /* put together the new matrix in MATSEQSBAIJ format */ 373 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 375 b = (Mat_SeqSBAIJ*)(fact)->data; 376 b->singlemalloc = PETSC_FALSE; 377 b->free_a = PETSC_TRUE; 378 b->free_ij = PETSC_TRUE; 379 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 380 b->j = uj; 381 b->i = ui; 382 b->diag = 0; 383 b->ilen = 0; 384 b->imax = 0; 385 b->row = perm; 386 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 387 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 388 b->icol = perm; 389 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 390 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 391 ierr = PetscLogObjectMemory(fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 392 b->maxnz = b->nz = ui[mbs]; 393 394 (fact)->info.factor_mallocs = reallocs; 395 (fact)->info.fill_ratio_given = fill; 396 if (ai[mbs] != 0) { 397 (fact)->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 398 } else { 399 (fact)->info.fill_ratio_needed = 0.0; 400 } 401 ierr = MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_inplace" 407 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 408 { 409 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 410 Mat_SeqSBAIJ *b; 411 PetscErrorCode ierr; 412 PetscTruth perm_identity,missing; 413 PetscReal fill = info->fill; 414 const PetscInt *rip,*ai,*aj; 415 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d; 416 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 417 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr; 418 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 419 PetscBT lnkbt; 420 421 PetscFunctionBegin; 422 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 423 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 424 425 /* 426 This code originally uses Modified Sparse Row (MSR) storage 427 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 428 Then it is rewritten so the factor B takes seqsbaij format. However the associated 429 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 430 thus the original code in MSR format is still used for these cases. 431 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 432 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 433 */ 434 if (bs > 1){ 435 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 436 PetscFunctionReturn(0); 437 } 438 439 /* check whether perm is the identity mapping */ 440 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 441 442 if (perm_identity){ 443 a->permute = PETSC_FALSE; 444 ai = a->i; aj = a->j; 445 } else { 446 SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 447 /* There are bugs for reordeing. Needs further work. 448 MatReordering for sbaij cannot be efficient. User should use aij formt! */ 449 a->permute = PETSC_TRUE; 450 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 451 ai = a->inew; aj = a->jnew; 452 } 453 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 454 455 /* initialization */ 456 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 457 ui[0] = 0; 458 459 /* jl: linked list for storing indices of the pivot rows 460 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 461 ierr = PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);CHKERRQ(ierr); 462 for (i=0; i<mbs; i++){ 463 jl[i] = mbs; il[i] = 0; 464 } 465 466 /* create and initialize a linked list for storing column indices of the active row k */ 467 nlnk = mbs + 1; 468 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 469 470 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 471 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 472 current_space = free_space; 473 474 for (k=0; k<mbs; k++){ /* for each active row k */ 475 /* initialize lnk by the column indices of row rip[k] of A */ 476 nzk = 0; 477 ncols = ai[rip[k]+1] - ai[rip[k]]; 478 for (j=0; j<ncols; j++){ 479 i = *(aj + ai[rip[k]] + j); 480 cols[j] = rip[i]; 481 } 482 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 483 nzk += nlnk; 484 485 /* update lnk by computing fill-in for each pivot row to be merged in */ 486 prow = jl[k]; /* 1st pivot row */ 487 488 while (prow < k){ 489 nextprow = jl[prow]; 490 /* merge prow into k-th row */ 491 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 492 jmax = ui[prow+1]; 493 ncols = jmax-jmin; 494 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 495 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 496 nzk += nlnk; 497 498 /* update il and jl for prow */ 499 if (jmin < jmax){ 500 il[prow] = jmin; 501 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 502 } 503 prow = nextprow; 504 } 505 506 /* if free space is not available, make more free space */ 507 if (current_space->local_remaining<nzk) { 508 i = mbs - k + 1; /* num of unfactored rows */ 509 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 510 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 511 reallocs++; 512 } 513 514 /* copy data into free space, then initialize lnk */ 515 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 516 517 /* add the k-th row into il and jl */ 518 if (nzk-1 > 0){ 519 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 520 jl[k] = jl[i]; jl[i] = k; 521 il[k] = ui[k] + 1; 522 } 523 ui_ptr[k] = current_space->array; 524 current_space->array += nzk; 525 current_space->local_used += nzk; 526 current_space->local_remaining -= nzk; 527 528 ui[k+1] = ui[k] + nzk; 529 } 530 531 #if defined(PETSC_USE_INFO) 532 if (ai[mbs] != 0) { 533 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 534 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 535 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 536 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 537 } else { 538 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 539 } 540 #endif 541 542 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 543 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 544 545 /* destroy list of free space and other temporary array(s) */ 546 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 547 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 548 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 549 550 /* put together the new matrix in MATSEQSBAIJ format */ 551 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 552 553 b = (Mat_SeqSBAIJ*)(fact)->data; 554 b->singlemalloc = PETSC_FALSE; 555 b->free_a = PETSC_TRUE; 556 b->free_ij = PETSC_TRUE; 557 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 558 b->j = uj; 559 b->i = ui; 560 b->diag = 0; 561 b->ilen = 0; 562 b->imax = 0; 563 b->row = perm; 564 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 565 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 566 b->icol = perm; 567 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 568 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 569 ierr = PetscLogObjectMemory(fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 570 b->maxnz = b->nz = ui[mbs]; 571 572 (fact)->info.factor_mallocs = reallocs; 573 (fact)->info.fill_ratio_given = fill; 574 if (ai[mbs] != 0) { 575 (fact)->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 576 } else { 577 (fact)->info.fill_ratio_needed = 0.0; 578 } 579 ierr = MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 585 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 586 { 587 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 588 IS perm = b->row; 589 PetscErrorCode ierr; 590 const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j; 591 PetscInt i,j; 592 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 593 PetscInt bs=A->rmap->bs,bs2 = a->bs2,bslog = 0; 594 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 595 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 596 MatScalar *work; 597 PetscInt *pivots; 598 599 PetscFunctionBegin; 600 /* initialization */ 601 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 602 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 603 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 604 for (i=0; i<mbs; i++) { 605 jl[i] = mbs; il[0] = 0; 606 } 607 ierr = PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);CHKERRQ(ierr); 608 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 609 610 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 611 612 /* check permutation */ 613 if (!a->permute){ 614 ai = a->i; aj = a->j; aa = a->a; 615 } else { 616 ai = a->inew; aj = a->jnew; 617 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 618 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 619 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 620 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 621 622 /* flops in while loop */ 623 bslog = 2*bs*bs2; 624 625 for (i=0; i<mbs; i++){ 626 jmin = ai[i]; jmax = ai[i+1]; 627 for (j=jmin; j<jmax; j++){ 628 while (a2anew[j] != j){ 629 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 630 for (k1=0; k1<bs2; k1++){ 631 dk[k1] = aa[k*bs2+k1]; 632 aa[k*bs2+k1] = aa[j*bs2+k1]; 633 aa[j*bs2+k1] = dk[k1]; 634 } 635 } 636 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 637 if (i > aj[j]){ 638 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 639 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 640 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 641 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 642 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 643 } 644 } 645 } 646 } 647 ierr = PetscFree(a2anew);CHKERRQ(ierr); 648 } 649 650 /* for each row k */ 651 for (k = 0; k<mbs; k++){ 652 653 /*initialize k-th row with elements nonzero in row perm(k) of A */ 654 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 655 656 ap = aa + jmin*bs2; 657 for (j = jmin; j < jmax; j++){ 658 vj = perm_ptr[aj[j]]; /* block col. index */ 659 rtmp_ptr = rtmp + vj*bs2; 660 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 661 } 662 663 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 664 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 665 i = jl[k]; /* first row to be added to k_th row */ 666 667 while (i < k){ 668 nexti = jl[i]; /* next row to be added to k_th row */ 669 670 /* compute multiplier */ 671 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 672 673 /* uik = -inv(Di)*U_bar(i,k) */ 674 diag = ba + i*bs2; 675 u = ba + ili*bs2; 676 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 677 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 678 679 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 680 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 681 ierr = PetscLogFlops(bslog*2.0);CHKERRQ(ierr); 682 683 /* update -U(i,k) */ 684 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 685 686 /* add multiple of row i to k-th row ... */ 687 jmin = ili + 1; jmax = bi[i+1]; 688 if (jmin < jmax){ 689 for (j=jmin; j<jmax; j++) { 690 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 691 rtmp_ptr = rtmp + bj[j]*bs2; 692 u = ba + j*bs2; 693 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 694 } 695 ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr); 696 697 /* ... add i to row list for next nonzero entry */ 698 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 699 j = bj[jmin]; 700 jl[i] = jl[j]; jl[j] = i; /* update jl */ 701 } 702 i = nexti; 703 } 704 705 /* save nonzero entries in k-th row of U ... */ 706 707 /* invert diagonal block */ 708 diag = ba+k*bs2; 709 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 710 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 711 712 jmin = bi[k]; jmax = bi[k+1]; 713 if (jmin < jmax) { 714 for (j=jmin; j<jmax; j++){ 715 vj = bj[j]; /* block col. index of U */ 716 u = ba + j*bs2; 717 rtmp_ptr = rtmp + vj*bs2; 718 for (k1=0; k1<bs2; k1++){ 719 *u++ = *rtmp_ptr; 720 *rtmp_ptr++ = 0.0; 721 } 722 } 723 724 /* ... add k to row list for first nonzero entry in k-th row */ 725 il[k] = jmin; 726 i = bj[jmin]; 727 jl[k] = jl[i]; jl[i] = k; 728 } 729 } 730 731 ierr = PetscFree(rtmp);CHKERRQ(ierr); 732 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 733 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr); 734 ierr = PetscFree(pivots);CHKERRQ(ierr); 735 if (a->permute){ 736 ierr = PetscFree(aa);CHKERRQ(ierr); 737 } 738 739 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 740 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 741 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 742 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 743 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 744 745 C->assembled = PETSC_TRUE; 746 C->preallocated = PETSC_TRUE; 747 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 753 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 754 { 755 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 758 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 759 PetscInt bs=A->rmap->bs,bs2 = a->bs2,bslog; 760 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 761 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 762 MatScalar *work; 763 PetscInt *pivots; 764 765 PetscFunctionBegin; 766 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 767 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 768 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 769 for (i=0; i<mbs; i++) { 770 jl[i] = mbs; il[0] = 0; 771 } 772 ierr = PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);CHKERRQ(ierr); 773 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 774 775 ai = a->i; aj = a->j; aa = a->a; 776 777 /* flops in while loop */ 778 bslog = 2*bs*bs2; 779 780 /* for each row k */ 781 for (k = 0; k<mbs; k++){ 782 783 /*initialize k-th row with elements nonzero in row k of A */ 784 jmin = ai[k]; jmax = ai[k+1]; 785 ap = aa + jmin*bs2; 786 for (j = jmin; j < jmax; j++){ 787 vj = aj[j]; /* block col. index */ 788 rtmp_ptr = rtmp + vj*bs2; 789 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 790 } 791 792 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 793 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 794 i = jl[k]; /* first row to be added to k_th row */ 795 796 while (i < k){ 797 nexti = jl[i]; /* next row to be added to k_th row */ 798 799 /* compute multiplier */ 800 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 801 802 /* uik = -inv(Di)*U_bar(i,k) */ 803 diag = ba + i*bs2; 804 u = ba + ili*bs2; 805 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 806 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 807 808 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 809 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 810 ierr = PetscLogFlops(bslog*2.0);CHKERRQ(ierr); 811 812 /* update -U(i,k) */ 813 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 814 815 /* add multiple of row i to k-th row ... */ 816 jmin = ili + 1; jmax = bi[i+1]; 817 if (jmin < jmax){ 818 for (j=jmin; j<jmax; j++) { 819 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 820 rtmp_ptr = rtmp + bj[j]*bs2; 821 u = ba + j*bs2; 822 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 823 } 824 ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr); 825 826 /* ... add i to row list for next nonzero entry */ 827 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 828 j = bj[jmin]; 829 jl[i] = jl[j]; jl[j] = i; /* update jl */ 830 } 831 i = nexti; 832 } 833 834 /* save nonzero entries in k-th row of U ... */ 835 836 /* invert diagonal block */ 837 diag = ba+k*bs2; 838 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 839 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 840 841 jmin = bi[k]; jmax = bi[k+1]; 842 if (jmin < jmax) { 843 for (j=jmin; j<jmax; j++){ 844 vj = bj[j]; /* block col. index of U */ 845 u = ba + j*bs2; 846 rtmp_ptr = rtmp + vj*bs2; 847 for (k1=0; k1<bs2; k1++){ 848 *u++ = *rtmp_ptr; 849 *rtmp_ptr++ = 0.0; 850 } 851 } 852 853 /* ... add k to row list for first nonzero entry in k-th row */ 854 il[k] = jmin; 855 i = bj[jmin]; 856 jl[k] = jl[i]; jl[i] = k; 857 } 858 } 859 860 ierr = PetscFree(rtmp);CHKERRQ(ierr); 861 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 862 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr); 863 ierr = PetscFree(pivots);CHKERRQ(ierr); 864 865 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 866 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 867 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 868 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 869 C->assembled = PETSC_TRUE; 870 C->preallocated = PETSC_TRUE; 871 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 872 PetscFunctionReturn(0); 873 } 874 875 /* 876 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 877 Version for blocks 2 by 2. 878 */ 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 881 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info) 882 { 883 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 884 IS perm = b->row; 885 PetscErrorCode ierr; 886 const PetscInt *ai,*aj,*perm_ptr; 887 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 888 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 889 MatScalar *ba = b->a,*aa,*ap; 890 MatScalar *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4]; 891 PetscReal shift = info->shiftinblocks; 892 893 PetscFunctionBegin; 894 /* initialization */ 895 /* il and jl record the first nonzero element in each row of the accessing 896 window U(0:k, k:mbs-1). 897 jl: list of rows to be added to uneliminated rows 898 i>= k: jl(i) is the first row to be added to row i 899 i< k: jl(i) is the row following row i in some list of rows 900 jl(i) = mbs indicates the end of a list 901 il(i): points to the first nonzero element in columns k,...,mbs-1 of 902 row i of U */ 903 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 904 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 905 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 906 for (i=0; i<mbs; i++) { 907 jl[i] = mbs; il[0] = 0; 908 } 909 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 910 911 /* check permutation */ 912 if (!a->permute){ 913 ai = a->i; aj = a->j; aa = a->a; 914 } else { 915 ai = a->inew; aj = a->jnew; 916 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 917 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 918 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 919 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 920 921 for (i=0; i<mbs; i++){ 922 jmin = ai[i]; jmax = ai[i+1]; 923 for (j=jmin; j<jmax; j++){ 924 while (a2anew[j] != j){ 925 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 926 for (k1=0; k1<4; k1++){ 927 dk[k1] = aa[k*4+k1]; 928 aa[k*4+k1] = aa[j*4+k1]; 929 aa[j*4+k1] = dk[k1]; 930 } 931 } 932 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 933 if (i > aj[j]){ 934 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 935 ap = aa + j*4; /* ptr to the beginning of the block */ 936 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 937 ap[1] = ap[2]; 938 ap[2] = dk[1]; 939 } 940 } 941 } 942 ierr = PetscFree(a2anew);CHKERRQ(ierr); 943 } 944 945 /* for each row k */ 946 for (k = 0; k<mbs; k++){ 947 948 /*initialize k-th row with elements nonzero in row perm(k) of A */ 949 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 950 ap = aa + jmin*4; 951 for (j = jmin; j < jmax; j++){ 952 vj = perm_ptr[aj[j]]; /* block col. index */ 953 rtmp_ptr = rtmp + vj*4; 954 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 955 } 956 957 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 958 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 959 i = jl[k]; /* first row to be added to k_th row */ 960 961 while (i < k){ 962 nexti = jl[i]; /* next row to be added to k_th row */ 963 964 /* compute multiplier */ 965 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 966 967 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 968 diag = ba + i*4; 969 u = ba + ili*4; 970 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 971 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 972 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 973 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 974 975 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 976 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 977 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 978 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 979 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 980 981 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr); 982 983 /* update -U(i,k): ba[ili] = uik */ 984 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 985 986 /* add multiple of row i to k-th row ... */ 987 jmin = ili + 1; jmax = bi[i+1]; 988 if (jmin < jmax){ 989 for (j=jmin; j<jmax; j++) { 990 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 991 rtmp_ptr = rtmp + bj[j]*4; 992 u = ba + j*4; 993 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 994 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 995 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 996 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 997 } 998 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr); 999 1000 /* ... add i to row list for next nonzero entry */ 1001 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1002 j = bj[jmin]; 1003 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1004 } 1005 i = nexti; 1006 } 1007 1008 /* save nonzero entries in k-th row of U ... */ 1009 1010 /* invert diagonal block */ 1011 diag = ba+k*4; 1012 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1013 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 1014 1015 jmin = bi[k]; jmax = bi[k+1]; 1016 if (jmin < jmax) { 1017 for (j=jmin; j<jmax; j++){ 1018 vj = bj[j]; /* block col. index of U */ 1019 u = ba + j*4; 1020 rtmp_ptr = rtmp + vj*4; 1021 for (k1=0; k1<4; k1++){ 1022 *u++ = *rtmp_ptr; 1023 *rtmp_ptr++ = 0.0; 1024 } 1025 } 1026 1027 /* ... add k to row list for first nonzero entry in k-th row */ 1028 il[k] = jmin; 1029 i = bj[jmin]; 1030 jl[k] = jl[i]; jl[i] = k; 1031 } 1032 } 1033 1034 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1035 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1036 if (a->permute) { 1037 ierr = PetscFree(aa);CHKERRQ(ierr); 1038 } 1039 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 1040 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1041 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1042 C->assembled = PETSC_TRUE; 1043 C->preallocated = PETSC_TRUE; 1044 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 1045 PetscFunctionReturn(0); 1046 } 1047 1048 /* 1049 Version for when blocks are 2 by 2 Using natural ordering 1050 */ 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 1053 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 1054 { 1055 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1056 PetscErrorCode ierr; 1057 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 1058 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 1059 MatScalar *ba = b->a,*aa,*ap,dk[8],uik[8]; 1060 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 1061 PetscReal shift = info->shiftinblocks; 1062 1063 PetscFunctionBegin; 1064 /* initialization */ 1065 /* il and jl record the first nonzero element in each row of the accessing 1066 window U(0:k, k:mbs-1). 1067 jl: list of rows to be added to uneliminated rows 1068 i>= k: jl(i) is the first row to be added to row i 1069 i< k: jl(i) is the row following row i in some list of rows 1070 jl(i) = mbs indicates the end of a list 1071 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1072 row i of U */ 1073 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1074 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1075 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1076 for (i=0; i<mbs; i++) { 1077 jl[i] = mbs; il[0] = 0; 1078 } 1079 ai = a->i; aj = a->j; aa = a->a; 1080 1081 /* for each row k */ 1082 for (k = 0; k<mbs; k++){ 1083 1084 /*initialize k-th row with elements nonzero in row k of A */ 1085 jmin = ai[k]; jmax = ai[k+1]; 1086 ap = aa + jmin*4; 1087 for (j = jmin; j < jmax; j++){ 1088 vj = aj[j]; /* block col. index */ 1089 rtmp_ptr = rtmp + vj*4; 1090 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 1091 } 1092 1093 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1094 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 1095 i = jl[k]; /* first row to be added to k_th row */ 1096 1097 while (i < k){ 1098 nexti = jl[i]; /* next row to be added to k_th row */ 1099 1100 /* compute multiplier */ 1101 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1102 1103 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1104 diag = ba + i*4; 1105 u = ba + ili*4; 1106 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1107 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1108 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1109 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1110 1111 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1112 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1113 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1114 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1115 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1116 1117 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr); 1118 1119 /* update -U(i,k): ba[ili] = uik */ 1120 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1121 1122 /* add multiple of row i to k-th row ... */ 1123 jmin = ili + 1; jmax = bi[i+1]; 1124 if (jmin < jmax){ 1125 for (j=jmin; j<jmax; j++) { 1126 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1127 rtmp_ptr = rtmp + bj[j]*4; 1128 u = ba + j*4; 1129 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1130 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1131 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1132 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 1133 } 1134 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr); 1135 1136 /* ... add i to row list for next nonzero entry */ 1137 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1138 j = bj[jmin]; 1139 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1140 } 1141 i = nexti; 1142 } 1143 1144 /* save nonzero entries in k-th row of U ... */ 1145 1146 /* invert diagonal block */ 1147 diag = ba+k*4; 1148 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1149 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 1150 1151 jmin = bi[k]; jmax = bi[k+1]; 1152 if (jmin < jmax) { 1153 for (j=jmin; j<jmax; j++){ 1154 vj = bj[j]; /* block col. index of U */ 1155 u = ba + j*4; 1156 rtmp_ptr = rtmp + vj*4; 1157 for (k1=0; k1<4; k1++){ 1158 *u++ = *rtmp_ptr; 1159 *rtmp_ptr++ = 0.0; 1160 } 1161 } 1162 1163 /* ... add k to row list for first nonzero entry in k-th row */ 1164 il[k] = jmin; 1165 i = bj[jmin]; 1166 jl[k] = jl[i]; jl[i] = k; 1167 } 1168 } 1169 1170 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1171 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1172 1173 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1174 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1175 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1176 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1177 C->assembled = PETSC_TRUE; 1178 C->preallocated = PETSC_TRUE; 1179 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 1180 PetscFunctionReturn(0); 1181 } 1182 1183 /* 1184 Numeric U^T*D*U factorization for SBAIJ format. 1185 Version for blocks are 1 by 1. 1186 */ 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace" 1189 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info) 1190 { 1191 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1192 IS ip=b->row; 1193 PetscErrorCode ierr; 1194 const PetscInt *ai,*aj,*rip; 1195 PetscInt *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol; 1196 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1197 MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi; 1198 PetscReal zeropivot,rs,shiftnz; 1199 PetscReal shiftpd; 1200 ChShift_Ctx sctx; 1201 PetscInt newshift; 1202 1203 PetscFunctionBegin; 1204 /* initialization */ 1205 shiftnz = info->shiftnz; 1206 shiftpd = info->shiftpd; 1207 zeropivot = info->zeropivot; 1208 1209 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1210 if (!a->permute){ 1211 ai = a->i; aj = a->j; aa = a->a; 1212 } else { 1213 ai = a->inew; aj = a->jnew; 1214 nz = ai[mbs]; 1215 ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1216 a2anew = a->a2anew; 1217 bval = a->a; 1218 for (j=0; j<nz; j++){ 1219 aa[a2anew[j]] = *(bval++); 1220 } 1221 } 1222 1223 /* initialization */ 1224 /* il and jl record the first nonzero element in each row of the accessing 1225 window U(0:k, k:mbs-1). 1226 jl: list of rows to be added to uneliminated rows 1227 i>= k: jl(i) is the first row to be added to row i 1228 i< k: jl(i) is the row following row i in some list of rows 1229 jl(i) = mbs indicates the end of a list 1230 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1231 row i of U */ 1232 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1233 1234 sctx.shift_amount = 0; 1235 sctx.nshift = 0; 1236 do { 1237 sctx.chshift = PETSC_FALSE; 1238 for (i=0; i<mbs; i++) { 1239 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1240 } 1241 1242 for (k = 0; k<mbs; k++){ 1243 /*initialize k-th row by the perm[k]-th row of A */ 1244 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1245 bval = ba + bi[k]; 1246 for (j = jmin; j < jmax; j++){ 1247 col = rip[aj[j]]; 1248 rtmp[col] = aa[j]; 1249 *bval++ = 0.0; /* for in-place factorization */ 1250 } 1251 1252 /* shift the diagonal of the matrix */ 1253 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1254 1255 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1256 dk = rtmp[k]; 1257 i = jl[k]; /* first row to be added to k_th row */ 1258 1259 while (i < k){ 1260 nexti = jl[i]; /* next row to be added to k_th row */ 1261 1262 /* compute multiplier, update diag(k) and U(i,k) */ 1263 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1264 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1265 dk += uikdi*ba[ili]; 1266 ba[ili] = uikdi; /* -U(i,k) */ 1267 1268 /* add multiple of row i to k-th row */ 1269 jmin = ili + 1; jmax = bi[i+1]; 1270 if (jmin < jmax){ 1271 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1272 ierr = PetscLogFlops(2.0*(jmax-jmin));CHKERRQ(ierr); 1273 1274 /* update il and jl for row i */ 1275 il[i] = jmin; 1276 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1277 } 1278 i = nexti; 1279 } 1280 1281 /* shift the diagonals when zero pivot is detected */ 1282 /* compute rs=sum of abs(off-diagonal) */ 1283 rs = 0.0; 1284 jmin = bi[k]+1; 1285 nz = bi[k+1] - jmin; 1286 if (nz){ 1287 bcol = bj + jmin; 1288 while (nz--){ 1289 rs += PetscAbsScalar(rtmp[*bcol]); 1290 bcol++; 1291 } 1292 } 1293 1294 sctx.rs = rs; 1295 sctx.pv = dk; 1296 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1297 if (newshift == 1) break; /* sctx.shift_amount is updated */ 1298 1299 /* copy data into U(k,:) */ 1300 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1301 jmin = bi[k]+1; jmax = bi[k+1]; 1302 if (jmin < jmax) { 1303 for (j=jmin; j<jmax; j++){ 1304 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1305 } 1306 /* add the k-th row into il and jl */ 1307 il[k] = jmin; 1308 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1309 } 1310 } 1311 } while (sctx.chshift); 1312 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 1313 if (a->permute){ierr = PetscFree(aa);CHKERRQ(ierr);} 1314 1315 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1316 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1317 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1318 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1319 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1320 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1321 C->assembled = PETSC_TRUE; 1322 C->preallocated = PETSC_TRUE; 1323 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 1324 if (sctx.nshift){ 1325 if (shiftnz) { 1326 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1327 } else if (shiftpd) { 1328 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1329 } 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* 1335 Version for when blocks are 1 by 1 Using natural ordering - modified from MatCholeskyFactorNumeric_SeqAIJ() 1336 */ 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1339 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 1340 { 1341 Mat C = B; 1342 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1343 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1344 PetscErrorCode ierr; 1345 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 1346 PetscInt *ai=a->i,*aj=a->j; 1347 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 1348 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1349 1350 LUShift_Ctx sctx; 1351 PetscInt newshift; 1352 PetscReal rs; 1353 MatScalar d,*v; 1354 1355 PetscFunctionBegin; 1356 /* MatPivotSetUp(): initialize shift context sctx */ 1357 ierr = PetscMemzero(&sctx,sizeof(LUShift_Ctx));CHKERRQ(ierr); 1358 1359 /* if both shift schemes are chosen by user, only use info->shiftpd */ 1360 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 1361 sctx.shift_top = info->zeropivot; 1362 for (i=0; i<mbs; i++) { 1363 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1364 d = (aa)[a->diag[i]]; 1365 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1366 v = aa+ai[i]; 1367 nz = ai[i+1] - ai[i]; 1368 for (j=0; j<nz; j++) 1369 rs += PetscAbsScalar(v[j]); 1370 if (rs>sctx.shift_top) sctx.shift_top = rs; 1371 } 1372 sctx.shift_top *= 1.1; 1373 sctx.nshift_max = 5; 1374 sctx.shift_lo = 0.; 1375 sctx.shift_hi = 1.; 1376 } 1377 1378 /* allocate working arrays 1379 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1380 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1381 */ 1382 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 1383 1384 do { 1385 sctx.lushift = PETSC_FALSE; 1386 1387 for (i=0; i<mbs; i++) c2r[i] = mbs; 1388 il[0] = 0; 1389 1390 for (k = 0; k<mbs; k++){ 1391 /* zero rtmp */ 1392 nz = bi[k+1] - bi[k]; 1393 bjtmp = bj + bi[k]; 1394 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 1395 1396 /* load in initial unfactored row */ 1397 bval = ba + bi[k]; 1398 jmin = ai[k]; jmax = ai[k+1]; 1399 for (j = jmin; j < jmax; j++){ 1400 col = aj[j]; 1401 rtmp[col] = aa[j]; 1402 *bval++ = 0.0; /* for in-place factorization */ 1403 } 1404 /* shift the diagonal of the matrix: ZeropivotApply() */ 1405 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1406 1407 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1408 dk = rtmp[k]; 1409 i = c2r[k]; /* first row to be added to k_th row */ 1410 1411 while (i < k){ 1412 nexti = c2r[i]; /* next row to be added to k_th row */ 1413 1414 /* compute multiplier, update diag(k) and U(i,k) */ 1415 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1416 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 1417 dk += uikdi*ba[ili]; /* update diag[k] */ 1418 ba[ili] = uikdi; /* -U(i,k) */ 1419 1420 /* add multiple of row i to k-th row */ 1421 jmin = ili + 1; jmax = bi[i+1]; 1422 if (jmin < jmax){ 1423 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1424 /* update il and c2r for row i */ 1425 il[i] = jmin; 1426 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 1427 } 1428 i = nexti; 1429 } 1430 1431 /* copy data into U(k,:) */ 1432 rs = 0.0; 1433 jmin = bi[k]; jmax = bi[k+1]-1; 1434 if (jmin < jmax) { 1435 for (j=jmin; j<jmax; j++){ 1436 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 1437 } 1438 /* add the k-th row into il and c2r */ 1439 il[k] = jmin; 1440 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 1441 } 1442 1443 /* MatPivotCheck() */ 1444 sctx.rs = rs; 1445 sctx.pv = dk; 1446 if (info->shiftnz){ 1447 ierr = MatPivotCheck_nz(info,sctx,k,newshift);CHKERRQ(ierr); 1448 } else if (info->shiftpd){ 1449 ierr = MatPivotCheck_pd(info,sctx,k,newshift);CHKERRQ(ierr); 1450 } else if (info->shiftinblocks){ 1451 ierr = MatPivotCheck_inblocks(info,sctx,k,newshift);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatPivotCheck_none(info,sctx,k,newshift);CHKERRQ(ierr); 1454 } 1455 dk = sctx.pv; 1456 if (newshift == 1) break; 1457 1458 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 1459 } 1460 } while (sctx.lushift); 1461 1462 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 1463 1464 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1465 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1466 B->ops->forwardsolve = 0; 1467 B->ops->backwardsolve = 0; 1468 1469 C->assembled = PETSC_TRUE; 1470 C->preallocated = PETSC_TRUE; 1471 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1472 1473 /* MatPivotView() */ 1474 if (sctx.nshift){ 1475 if (info->shiftpd) { 1476 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 1477 } else if (info->shiftnz) { 1478 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1479 } else if (info->shiftinblocks){ 1480 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftinblocks);CHKERRQ(ierr); 1481 } 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace" 1488 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) 1489 { 1490 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1491 PetscErrorCode ierr; 1492 PetscInt i,j,mbs = a->mbs; 1493 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1494 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 1495 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1496 PetscReal zeropivot,rs,shiftnz; 1497 PetscReal shiftpd; 1498 ChShift_Ctx sctx; 1499 PetscInt newshift; 1500 1501 PetscFunctionBegin; 1502 /* initialization */ 1503 shiftnz = info->shiftnz; 1504 shiftpd = info->shiftpd; 1505 zeropivot = info->zeropivot; 1506 1507 /* il and jl record the first nonzero element in each row of the accessing 1508 window U(0:k, k:mbs-1). 1509 jl: list of rows to be added to uneliminated rows 1510 i>= k: jl(i) is the first row to be added to row i 1511 i< k: jl(i) is the row following row i in some list of rows 1512 jl(i) = mbs indicates the end of a list 1513 il(i): points to the first nonzero element in U(i,k:mbs-1) 1514 */ 1515 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1516 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1517 1518 sctx.shift_amount = 0; 1519 sctx.nshift = 0; 1520 do { 1521 sctx.chshift = PETSC_FALSE; 1522 for (i=0; i<mbs; i++) { 1523 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1524 } 1525 1526 for (k = 0; k<mbs; k++){ 1527 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1528 nz = ai[k+1] - ai[k]; 1529 acol = aj + ai[k]; 1530 aval = aa + ai[k]; 1531 bval = ba + bi[k]; 1532 while (nz -- ){ 1533 rtmp[*acol++] = *aval++; 1534 *bval++ = 0.0; /* for in-place factorization */ 1535 } 1536 1537 /* shift the diagonal of the matrix */ 1538 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1539 1540 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1541 dk = rtmp[k]; 1542 i = jl[k]; /* first row to be added to k_th row */ 1543 1544 while (i < k){ 1545 nexti = jl[i]; /* next row to be added to k_th row */ 1546 /* compute multiplier, update D(k) and U(i,k) */ 1547 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1548 uikdi = - ba[ili]*ba[bi[i]]; 1549 dk += uikdi*ba[ili]; 1550 ba[ili] = uikdi; /* -U(i,k) */ 1551 1552 /* add multiple of row i to k-th row ... */ 1553 jmin = ili + 1; 1554 nz = bi[i+1] - jmin; 1555 if (nz > 0){ 1556 bcol = bj + jmin; 1557 bval = ba + jmin; 1558 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 1559 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1560 1561 /* update il and jl for i-th row */ 1562 il[i] = jmin; 1563 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1564 } 1565 i = nexti; 1566 } 1567 1568 /* shift the diagonals when zero pivot is detected */ 1569 /* compute rs=sum of abs(off-diagonal) */ 1570 rs = 0.0; 1571 jmin = bi[k]+1; 1572 nz = bi[k+1] - jmin; 1573 if (nz){ 1574 bcol = bj + jmin; 1575 while (nz--){ 1576 rs += PetscAbsScalar(rtmp[*bcol]); 1577 bcol++; 1578 } 1579 } 1580 1581 sctx.rs = rs; 1582 sctx.pv = dk; 1583 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1584 if (newshift == 1) break; /* sctx.shift_amount is updated */ 1585 1586 /* copy data into U(k,:) */ 1587 ba[bi[k]] = 1.0/dk; 1588 jmin = bi[k]+1; 1589 nz = bi[k+1] - jmin; 1590 if (nz){ 1591 bcol = bj + jmin; 1592 bval = ba + jmin; 1593 while (nz--){ 1594 *bval++ = rtmp[*bcol]; 1595 rtmp[*bcol++] = 0.0; 1596 } 1597 /* add k-th row into il and jl */ 1598 il[k] = jmin; 1599 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1600 } 1601 } /* end of for (k = 0; k<mbs; k++) */ 1602 } while (sctx.chshift); 1603 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1604 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1605 1606 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1607 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1608 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1609 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1610 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1611 1612 C->assembled = PETSC_TRUE; 1613 C->preallocated = PETSC_TRUE; 1614 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 1615 if (sctx.nshift){ 1616 if (shiftnz) { 1617 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1618 } else if (shiftpd) { 1619 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1620 } 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1627 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info) 1628 { 1629 PetscErrorCode ierr; 1630 Mat C; 1631 1632 PetscFunctionBegin; 1633 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);CHKERRQ(ierr); 1634 ierr = MatCholeskyFactorSymbolic(C,A,perm,info);CHKERRQ(ierr); 1635 ierr = MatCholeskyFactorNumeric(C,A,info);CHKERRQ(ierr); 1636 A->ops->solve = C->ops->solve; 1637 A->ops->solvetranspose = C->ops->solvetranspose; 1638 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1639 PetscFunctionReturn(0); 1640 } 1641 1642 1643