1 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <../src/mat/blockinvert.h> 5 #include <petscis.h> 6 7 /* 8 input: 9 F -- numeric factor 10 output: 11 nneg, nzero, npos: matrix inertia 12 */ 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 16 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 17 { 18 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 19 MatScalar *dd = fact_ptr->a; 20 PetscInt mbs=fact_ptr->mbs,bs=F->rmap->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->diag; 21 22 PetscFunctionBegin; 23 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 24 nneig_tmp = 0; npos_tmp = 0; 25 for (i=0; i<mbs; i++){ 26 if (PetscRealPart(dd[*fi]) > 0.0){ 27 npos_tmp++; 28 } else if (PetscRealPart(dd[*fi]) < 0.0){ 29 nneig_tmp++; 30 } 31 fi++; 32 } 33 if (nneig) *nneig = nneig_tmp; 34 if (npos) *npos = npos_tmp; 35 if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 36 37 PetscFunctionReturn(0); 38 } 39 40 /* 41 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 42 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 43 */ 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 46 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info) 47 { 48 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 49 PetscErrorCode ierr; 50 const PetscInt *rip,*ai,*aj; 51 PetscInt i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2; 52 PetscInt m,reallocs = 0,prow; 53 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 54 PetscReal f = info->fill; 55 PetscBool perm_identity; 56 57 PetscFunctionBegin; 58 /* check whether perm is the identity mapping */ 59 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 60 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 61 62 if (perm_identity){ /* without permutation */ 63 a->permute = PETSC_FALSE; 64 ai = a->i; aj = a->j; 65 } else { /* non-trivial permutation */ 66 a->permute = PETSC_TRUE; 67 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 68 ai = a->inew; aj = a->jnew; 69 } 70 71 /* initialization */ 72 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 73 umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 74 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 75 iu[0] = mbs+1; 76 juidx = mbs + 1; /* index for ju */ 77 /* jl linked list for pivot row -- linked list for col index */ 78 ierr = PetscMalloc2(mbs,PetscInt,&jl,mbs,PetscInt,&q);CHKERRQ(ierr); 79 for (i=0; i<mbs; i++){ 80 jl[i] = mbs; 81 q[i] = 0; 82 } 83 84 /* for each row k */ 85 for (k=0; k<mbs; k++){ 86 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 87 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 88 q[k] = mbs; 89 /* initialize nonzero structure of k-th row to row rip[k] of A */ 90 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 91 jmax = ai[rip[k]+1]; 92 for (j=jmin; j<jmax; j++){ 93 vj = rip[aj[j]]; /* col. value */ 94 if(vj > k){ 95 qm = k; 96 do { 97 m = qm; qm = q[m]; 98 } while(qm < vj); 99 if (qm == vj) { 100 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n"); 101 } 102 nzk++; 103 q[m] = vj; 104 q[vj] = qm; 105 } /* if(vj > k) */ 106 } /* for (j=jmin; j<jmax; j++) */ 107 108 /* modify nonzero structure of k-th row by computing fill-in 109 for each row i to be merged in */ 110 prow = k; 111 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 112 113 while (prow < k){ 114 /* merge row prow into k-th row */ 115 jmin = iu[prow] + 1; jmax = iu[prow+1]; 116 qm = k; 117 for (j=jmin; j<jmax; j++){ 118 vj = ju[j]; 119 do { 120 m = qm; qm = q[m]; 121 } while (qm < vj); 122 if (qm != vj){ 123 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 124 } 125 } 126 prow = jl[prow]; /* next pivot row */ 127 } 128 129 /* add k to row list for first nonzero element in k-th row */ 130 if (nzk > 0){ 131 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 132 jl[k] = jl[i]; jl[i] = k; 133 } 134 iu[k+1] = iu[k] + nzk; 135 136 /* allocate more space to ju if needed */ 137 if (iu[k+1] > umax) { 138 /* estimate how much additional space we will need */ 139 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 140 /* just double the memory each time */ 141 maxadd = umax; 142 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 143 umax += maxadd; 144 145 /* allocate a longer ju */ 146 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 147 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 148 ierr = PetscFree(ju);CHKERRQ(ierr); 149 ju = jutmp; 150 reallocs++; /* count how many times we realloc */ 151 } 152 153 /* save nonzero structure of k-th row in ju */ 154 i=k; 155 while (nzk --) { 156 i = q[i]; 157 ju[juidx++] = i; 158 } 159 } 160 161 #if defined(PETSC_USE_INFO) 162 if (ai[mbs] != 0) { 163 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 164 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 165 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 166 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 167 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 168 } else { 169 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 170 } 171 #endif 172 173 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 174 ierr = PetscFree2(jl,q);CHKERRQ(ierr); 175 176 /* put together the new matrix */ 177 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(F,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 178 179 /* ierr = PetscLogObjectParent(B,iperm);CHKERRQ(ierr); */ 180 b = (Mat_SeqSBAIJ*)(F)->data; 181 b->singlemalloc = PETSC_FALSE; 182 b->free_a = PETSC_TRUE; 183 b->free_ij = PETSC_TRUE; 184 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 185 b->j = ju; 186 b->i = iu; 187 b->diag = 0; 188 b->ilen = 0; 189 b->imax = 0; 190 b->row = perm; 191 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 192 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 193 b->icol = perm; 194 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 195 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 196 /* In b structure: Free imax, ilen, old a, old j. 197 Allocate idnew, solve_work, new a, new j */ 198 ierr = PetscLogObjectMemory(F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 199 b->maxnz = b->nz = iu[mbs]; 200 201 (F)->info.factor_mallocs = reallocs; 202 (F)->info.fill_ratio_given = f; 203 if (ai[mbs] != 0) { 204 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 205 } else { 206 (F)->info.fill_ratio_needed = 0.0; 207 } 208 ierr = MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 /* 212 Symbolic U^T*D*U factorization for SBAIJ format. 213 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 214 */ 215 #include <petscbt.h> 216 #include <../src/mat/utils/freespace.h> 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 219 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 220 { 221 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 222 Mat_SeqSBAIJ *b; 223 PetscErrorCode ierr; 224 PetscBool perm_identity,missing; 225 PetscReal fill = info->fill; 226 const PetscInt *rip,*ai=a->i,*aj=a->j; 227 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d; 228 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 229 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 230 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 231 PetscBT lnkbt; 232 233 PetscFunctionBegin; 234 if (bs > 1){ 235 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 239 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 240 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 241 242 /* check whether perm is the identity mapping */ 243 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 244 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 245 a->permute = PETSC_FALSE; 246 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 247 248 /* initialization */ 249 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 250 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 251 ui[0] = 0; 252 253 /* jl: linked list for storing indices of the pivot rows 254 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 255 ierr = PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);CHKERRQ(ierr); 256 for (i=0; i<mbs; i++){ 257 jl[i] = mbs; il[i] = 0; 258 } 259 260 /* create and initialize a linked list for storing column indices of the active row k */ 261 nlnk = mbs + 1; 262 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 263 264 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 265 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 266 current_space = free_space; 267 268 for (k=0; k<mbs; k++){ /* for each active row k */ 269 /* initialize lnk by the column indices of row rip[k] of A */ 270 nzk = 0; 271 ncols = ai[k+1] - ai[k]; 272 if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k); 273 for (j=0; j<ncols; j++){ 274 i = *(aj + ai[k] + j); 275 cols[j] = i; 276 } 277 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 278 nzk += nlnk; 279 280 /* update lnk by computing fill-in for each pivot row to be merged in */ 281 prow = jl[k]; /* 1st pivot row */ 282 283 while (prow < k){ 284 nextprow = jl[prow]; 285 /* merge prow into k-th row */ 286 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 287 jmax = ui[prow+1]; 288 ncols = jmax-jmin; 289 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 290 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 nzk += nlnk; 292 293 /* update il and jl for prow */ 294 if (jmin < jmax){ 295 il[prow] = jmin; 296 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 297 } 298 prow = nextprow; 299 } 300 301 /* if free space is not available, make more free space */ 302 if (current_space->local_remaining<nzk) { 303 i = mbs - k + 1; /* num of unfactored rows */ 304 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 305 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 306 reallocs++; 307 } 308 309 /* copy data into free space, then initialize lnk */ 310 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 311 312 /* add the k-th row into il and jl */ 313 if (nzk > 1){ 314 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 315 jl[k] = jl[i]; jl[i] = k; 316 il[k] = ui[k] + 1; 317 } 318 ui_ptr[k] = current_space->array; 319 current_space->array += nzk; 320 current_space->local_used += nzk; 321 current_space->local_remaining -= nzk; 322 323 ui[k+1] = ui[k] + nzk; 324 } 325 326 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 327 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 328 329 /* destroy list of free space and other temporary array(s) */ 330 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 331 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 332 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 333 334 /* put together the new matrix in MATSEQSBAIJ format */ 335 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 336 337 b = (Mat_SeqSBAIJ*)fact->data; 338 b->singlemalloc = PETSC_FALSE; 339 b->free_a = PETSC_TRUE; 340 b->free_ij = PETSC_TRUE; 341 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 342 b->j = uj; 343 b->i = ui; 344 b->diag = udiag; 345 b->free_diag = PETSC_TRUE; 346 b->ilen = 0; 347 b->imax = 0; 348 b->row = perm; 349 b->icol = perm; 350 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 351 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 352 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 353 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 354 ierr = PetscLogObjectMemory(fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 355 b->maxnz = b->nz = ui[mbs]; 356 357 fact->info.factor_mallocs = reallocs; 358 fact->info.fill_ratio_given = fill; 359 if (ai[mbs] != 0) { 360 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs]; 361 } else { 362 fact->info.fill_ratio_needed = 0.0; 363 } 364 #if defined(PETSC_USE_INFO) 365 if (ai[mbs] != 0) { 366 PetscReal af = fact->info.fill_ratio_needed; 367 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 368 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 369 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 370 } else { 371 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 372 } 373 #endif 374 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_inplace" 380 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 381 { 382 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 383 Mat_SeqSBAIJ *b; 384 PetscErrorCode ierr; 385 PetscBool perm_identity,missing; 386 PetscReal fill = info->fill; 387 const PetscInt *rip,*ai,*aj; 388 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d; 389 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 390 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr; 391 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 392 PetscBT lnkbt; 393 394 PetscFunctionBegin; 395 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 396 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 397 398 /* 399 This code originally uses Modified Sparse Row (MSR) storage 400 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 401 Then it is rewritten so the factor B takes seqsbaij format. However the associated 402 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 403 thus the original code in MSR format is still used for these cases. 404 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 405 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 406 */ 407 if (bs > 1){ 408 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* check whether perm is the identity mapping */ 413 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 414 415 if (perm_identity){ 416 a->permute = PETSC_FALSE; 417 ai = a->i; aj = a->j; 418 } else { 419 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format"); 420 /* There are bugs for reordeing. Needs further work. 421 MatReordering for sbaij cannot be efficient. User should use aij formt! */ 422 a->permute = PETSC_TRUE; 423 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 424 ai = a->inew; aj = a->jnew; 425 } 426 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 427 428 /* initialization */ 429 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 430 ui[0] = 0; 431 432 /* jl: linked list for storing indices of the pivot rows 433 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 434 ierr = PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);CHKERRQ(ierr); 435 for (i=0; i<mbs; i++){ 436 jl[i] = mbs; il[i] = 0; 437 } 438 439 /* create and initialize a linked list for storing column indices of the active row k */ 440 nlnk = mbs + 1; 441 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 442 443 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 444 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 445 current_space = free_space; 446 447 for (k=0; k<mbs; k++){ /* for each active row k */ 448 /* initialize lnk by the column indices of row rip[k] of A */ 449 nzk = 0; 450 ncols = ai[rip[k]+1] - ai[rip[k]]; 451 for (j=0; j<ncols; j++){ 452 i = *(aj + ai[rip[k]] + j); 453 cols[j] = rip[i]; 454 } 455 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 456 nzk += nlnk; 457 458 /* update lnk by computing fill-in for each pivot row to be merged in */ 459 prow = jl[k]; /* 1st pivot row */ 460 461 while (prow < k){ 462 nextprow = jl[prow]; 463 /* merge prow into k-th row */ 464 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 465 jmax = ui[prow+1]; 466 ncols = jmax-jmin; 467 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 468 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 469 nzk += nlnk; 470 471 /* update il and jl for prow */ 472 if (jmin < jmax){ 473 il[prow] = jmin; 474 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 475 } 476 prow = nextprow; 477 } 478 479 /* if free space is not available, make more free space */ 480 if (current_space->local_remaining<nzk) { 481 i = mbs - k + 1; /* num of unfactored rows */ 482 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 483 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 484 reallocs++; 485 } 486 487 /* copy data into free space, then initialize lnk */ 488 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 489 490 /* add the k-th row into il and jl */ 491 if (nzk-1 > 0){ 492 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 493 jl[k] = jl[i]; jl[i] = k; 494 il[k] = ui[k] + 1; 495 } 496 ui_ptr[k] = current_space->array; 497 current_space->array += nzk; 498 current_space->local_used += nzk; 499 current_space->local_remaining -= nzk; 500 501 ui[k+1] = ui[k] + nzk; 502 } 503 504 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 505 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 506 507 /* destroy list of free space and other temporary array(s) */ 508 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 509 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 510 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 511 512 /* put together the new matrix in MATSEQSBAIJ format */ 513 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 514 515 b = (Mat_SeqSBAIJ*)fact->data; 516 b->singlemalloc = PETSC_FALSE; 517 b->free_a = PETSC_TRUE; 518 b->free_ij = PETSC_TRUE; 519 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 520 b->j = uj; 521 b->i = ui; 522 b->diag = 0; 523 b->ilen = 0; 524 b->imax = 0; 525 b->row = perm; 526 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 527 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 528 b->icol = perm; 529 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 530 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 531 ierr = PetscLogObjectMemory(fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 532 b->maxnz = b->nz = ui[mbs]; 533 534 fact->info.factor_mallocs = reallocs; 535 fact->info.fill_ratio_given = fill; 536 if (ai[mbs] != 0) { 537 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs]; 538 } else { 539 fact->info.fill_ratio_needed = 0.0; 540 } 541 #if defined(PETSC_USE_INFO) 542 if (ai[mbs] != 0) { 543 PetscReal af = fact->info.fill_ratio_needed; 544 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 545 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 546 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 547 } else { 548 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 549 } 550 #endif 551 ierr = MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 557 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 558 { 559 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 560 IS perm = b->row; 561 PetscErrorCode ierr; 562 const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j; 563 PetscInt i,j; 564 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 565 PetscInt bs=A->rmap->bs,bs2 = a->bs2,bslog = 0; 566 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 567 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 568 MatScalar *work; 569 PetscInt *pivots; 570 571 PetscFunctionBegin; 572 /* initialization */ 573 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 574 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 575 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 576 for (i=0; i<mbs; i++) { 577 jl[i] = mbs; il[0] = 0; 578 } 579 ierr = PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);CHKERRQ(ierr); 580 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 581 582 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 583 584 /* check permutation */ 585 if (!a->permute){ 586 ai = a->i; aj = a->j; aa = a->a; 587 } else { 588 ai = a->inew; aj = a->jnew; 589 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 590 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 591 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 592 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 593 594 /* flops in while loop */ 595 bslog = 2*bs*bs2; 596 597 for (i=0; i<mbs; i++){ 598 jmin = ai[i]; jmax = ai[i+1]; 599 for (j=jmin; j<jmax; j++){ 600 while (a2anew[j] != j){ 601 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 602 for (k1=0; k1<bs2; k1++){ 603 dk[k1] = aa[k*bs2+k1]; 604 aa[k*bs2+k1] = aa[j*bs2+k1]; 605 aa[j*bs2+k1] = dk[k1]; 606 } 607 } 608 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 609 if (i > aj[j]){ 610 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 611 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 612 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 613 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 614 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 615 } 616 } 617 } 618 } 619 ierr = PetscFree(a2anew);CHKERRQ(ierr); 620 } 621 622 /* for each row k */ 623 for (k = 0; k<mbs; k++){ 624 625 /*initialize k-th row with elements nonzero in row perm(k) of A */ 626 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 627 628 ap = aa + jmin*bs2; 629 for (j = jmin; j < jmax; j++){ 630 vj = perm_ptr[aj[j]]; /* block col. index */ 631 rtmp_ptr = rtmp + vj*bs2; 632 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 633 } 634 635 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 636 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 637 i = jl[k]; /* first row to be added to k_th row */ 638 639 while (i < k){ 640 nexti = jl[i]; /* next row to be added to k_th row */ 641 642 /* compute multiplier */ 643 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 644 645 /* uik = -inv(Di)*U_bar(i,k) */ 646 diag = ba + i*bs2; 647 u = ba + ili*bs2; 648 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 649 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 650 651 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 652 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 653 ierr = PetscLogFlops(bslog*2.0);CHKERRQ(ierr); 654 655 /* update -U(i,k) */ 656 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 657 658 /* add multiple of row i to k-th row ... */ 659 jmin = ili + 1; jmax = bi[i+1]; 660 if (jmin < jmax){ 661 for (j=jmin; j<jmax; j++) { 662 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 663 rtmp_ptr = rtmp + bj[j]*bs2; 664 u = ba + j*bs2; 665 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 666 } 667 ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr); 668 669 /* ... add i to row list for next nonzero entry */ 670 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 671 j = bj[jmin]; 672 jl[i] = jl[j]; jl[j] = i; /* update jl */ 673 } 674 i = nexti; 675 } 676 677 /* save nonzero entries in k-th row of U ... */ 678 679 /* invert diagonal block */ 680 diag = ba+k*bs2; 681 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 682 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 683 684 jmin = bi[k]; jmax = bi[k+1]; 685 if (jmin < jmax) { 686 for (j=jmin; j<jmax; j++){ 687 vj = bj[j]; /* block col. index of U */ 688 u = ba + j*bs2; 689 rtmp_ptr = rtmp + vj*bs2; 690 for (k1=0; k1<bs2; k1++){ 691 *u++ = *rtmp_ptr; 692 *rtmp_ptr++ = 0.0; 693 } 694 } 695 696 /* ... add k to row list for first nonzero entry in k-th row */ 697 il[k] = jmin; 698 i = bj[jmin]; 699 jl[k] = jl[i]; jl[i] = k; 700 } 701 } 702 703 ierr = PetscFree(rtmp);CHKERRQ(ierr); 704 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 705 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr); 706 ierr = PetscFree(pivots);CHKERRQ(ierr); 707 if (a->permute){ 708 ierr = PetscFree(aa);CHKERRQ(ierr); 709 } 710 711 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 712 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 713 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 714 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 715 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 716 717 C->assembled = PETSC_TRUE; 718 C->preallocated = PETSC_TRUE; 719 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 725 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 726 { 727 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 728 PetscErrorCode ierr; 729 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 730 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 731 PetscInt bs=A->rmap->bs,bs2 = a->bs2,bslog; 732 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 733 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 734 MatScalar *work; 735 PetscInt *pivots; 736 737 PetscFunctionBegin; 738 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 739 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 740 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 741 for (i=0; i<mbs; i++) { 742 jl[i] = mbs; il[0] = 0; 743 } 744 ierr = PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);CHKERRQ(ierr); 745 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 746 747 ai = a->i; aj = a->j; aa = a->a; 748 749 /* flops in while loop */ 750 bslog = 2*bs*bs2; 751 752 /* for each row k */ 753 for (k = 0; k<mbs; k++){ 754 755 /*initialize k-th row with elements nonzero in row k of A */ 756 jmin = ai[k]; jmax = ai[k+1]; 757 ap = aa + jmin*bs2; 758 for (j = jmin; j < jmax; j++){ 759 vj = aj[j]; /* block col. index */ 760 rtmp_ptr = rtmp + vj*bs2; 761 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 762 } 763 764 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 765 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 766 i = jl[k]; /* first row to be added to k_th row */ 767 768 while (i < k){ 769 nexti = jl[i]; /* next row to be added to k_th row */ 770 771 /* compute multiplier */ 772 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 773 774 /* uik = -inv(Di)*U_bar(i,k) */ 775 diag = ba + i*bs2; 776 u = ba + ili*bs2; 777 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 778 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 779 780 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 781 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 782 ierr = PetscLogFlops(bslog*2.0);CHKERRQ(ierr); 783 784 /* update -U(i,k) */ 785 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 786 787 /* add multiple of row i to k-th row ... */ 788 jmin = ili + 1; jmax = bi[i+1]; 789 if (jmin < jmax){ 790 for (j=jmin; j<jmax; j++) { 791 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 792 rtmp_ptr = rtmp + bj[j]*bs2; 793 u = ba + j*bs2; 794 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 795 } 796 ierr = PetscLogFlops(bslog*(jmax-jmin));CHKERRQ(ierr); 797 798 /* ... add i to row list for next nonzero entry */ 799 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 800 j = bj[jmin]; 801 jl[i] = jl[j]; jl[j] = i; /* update jl */ 802 } 803 i = nexti; 804 } 805 806 /* save nonzero entries in k-th row of U ... */ 807 808 /* invert diagonal block */ 809 diag = ba+k*bs2; 810 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 811 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 812 813 jmin = bi[k]; jmax = bi[k+1]; 814 if (jmin < jmax) { 815 for (j=jmin; j<jmax; j++){ 816 vj = bj[j]; /* block col. index of U */ 817 u = ba + j*bs2; 818 rtmp_ptr = rtmp + vj*bs2; 819 for (k1=0; k1<bs2; k1++){ 820 *u++ = *rtmp_ptr; 821 *rtmp_ptr++ = 0.0; 822 } 823 } 824 825 /* ... add k to row list for first nonzero entry in k-th row */ 826 il[k] = jmin; 827 i = bj[jmin]; 828 jl[k] = jl[i]; jl[i] = k; 829 } 830 } 831 832 ierr = PetscFree(rtmp);CHKERRQ(ierr); 833 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 834 ierr = PetscFree3(dk,uik,work);CHKERRQ(ierr); 835 ierr = PetscFree(pivots);CHKERRQ(ierr); 836 837 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 838 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 839 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 840 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 841 C->assembled = PETSC_TRUE; 842 C->preallocated = PETSC_TRUE; 843 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 844 PetscFunctionReturn(0); 845 } 846 847 /* 848 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 849 Version for blocks 2 by 2. 850 */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 853 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info) 854 { 855 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 856 IS perm = b->row; 857 PetscErrorCode ierr; 858 const PetscInt *ai,*aj,*perm_ptr; 859 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 860 PetscInt *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 861 MatScalar *ba = b->a,*aa,*ap; 862 MatScalar *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4]; 863 PetscReal shift = info->shiftamount; 864 865 PetscFunctionBegin; 866 /* initialization */ 867 /* il and jl record the first nonzero element in each row of the accessing 868 window U(0:k, k:mbs-1). 869 jl: list of rows to be added to uneliminated rows 870 i>= k: jl(i) is the first row to be added to row i 871 i< k: jl(i) is the row following row i in some list of rows 872 jl(i) = mbs indicates the end of a list 873 il(i): points to the first nonzero element in columns k,...,mbs-1 of 874 row i of U */ 875 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 876 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 877 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 878 for (i=0; i<mbs; i++) { 879 jl[i] = mbs; il[0] = 0; 880 } 881 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 882 883 /* check permutation */ 884 if (!a->permute){ 885 ai = a->i; aj = a->j; aa = a->a; 886 } else { 887 ai = a->inew; aj = a->jnew; 888 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 889 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 890 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 891 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 892 893 for (i=0; i<mbs; i++){ 894 jmin = ai[i]; jmax = ai[i+1]; 895 for (j=jmin; j<jmax; j++){ 896 while (a2anew[j] != j){ 897 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 898 for (k1=0; k1<4; k1++){ 899 dk[k1] = aa[k*4+k1]; 900 aa[k*4+k1] = aa[j*4+k1]; 901 aa[j*4+k1] = dk[k1]; 902 } 903 } 904 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 905 if (i > aj[j]){ 906 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 907 ap = aa + j*4; /* ptr to the beginning of the block */ 908 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 909 ap[1] = ap[2]; 910 ap[2] = dk[1]; 911 } 912 } 913 } 914 ierr = PetscFree(a2anew);CHKERRQ(ierr); 915 } 916 917 /* for each row k */ 918 for (k = 0; k<mbs; k++){ 919 920 /*initialize k-th row with elements nonzero in row perm(k) of A */ 921 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 922 ap = aa + jmin*4; 923 for (j = jmin; j < jmax; j++){ 924 vj = perm_ptr[aj[j]]; /* block col. index */ 925 rtmp_ptr = rtmp + vj*4; 926 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 927 } 928 929 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 930 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 931 i = jl[k]; /* first row to be added to k_th row */ 932 933 while (i < k){ 934 nexti = jl[i]; /* next row to be added to k_th row */ 935 936 /* compute multiplier */ 937 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 938 939 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 940 diag = ba + i*4; 941 u = ba + ili*4; 942 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 943 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 944 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 945 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 946 947 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 948 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 949 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 950 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 951 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 952 953 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr); 954 955 /* update -U(i,k): ba[ili] = uik */ 956 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 957 958 /* add multiple of row i to k-th row ... */ 959 jmin = ili + 1; jmax = bi[i+1]; 960 if (jmin < jmax){ 961 for (j=jmin; j<jmax; j++) { 962 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 963 rtmp_ptr = rtmp + bj[j]*4; 964 u = ba + j*4; 965 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 966 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 967 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 968 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 969 } 970 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr); 971 972 /* ... add i to row list for next nonzero entry */ 973 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 974 j = bj[jmin]; 975 jl[i] = jl[j]; jl[j] = i; /* update jl */ 976 } 977 i = nexti; 978 } 979 980 /* save nonzero entries in k-th row of U ... */ 981 982 /* invert diagonal block */ 983 diag = ba+k*4; 984 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 985 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 986 987 jmin = bi[k]; jmax = bi[k+1]; 988 if (jmin < jmax) { 989 for (j=jmin; j<jmax; j++){ 990 vj = bj[j]; /* block col. index of U */ 991 u = ba + j*4; 992 rtmp_ptr = rtmp + vj*4; 993 for (k1=0; k1<4; k1++){ 994 *u++ = *rtmp_ptr; 995 *rtmp_ptr++ = 0.0; 996 } 997 } 998 999 /* ... add k to row list for first nonzero entry in k-th row */ 1000 il[k] = jmin; 1001 i = bj[jmin]; 1002 jl[k] = jl[i]; jl[i] = k; 1003 } 1004 } 1005 1006 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1007 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1008 if (a->permute) { 1009 ierr = PetscFree(aa);CHKERRQ(ierr); 1010 } 1011 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 1012 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1013 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1014 C->assembled = PETSC_TRUE; 1015 C->preallocated = PETSC_TRUE; 1016 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 1017 PetscFunctionReturn(0); 1018 } 1019 1020 /* 1021 Version for when blocks are 2 by 2 Using natural ordering 1022 */ 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 1025 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 1026 { 1027 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1028 PetscErrorCode ierr; 1029 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 1030 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 1031 MatScalar *ba = b->a,*aa,*ap,dk[8],uik[8]; 1032 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 1033 PetscReal shift = info->shiftamount; 1034 1035 PetscFunctionBegin; 1036 /* initialization */ 1037 /* il and jl record the first nonzero element in each row of the accessing 1038 window U(0:k, k:mbs-1). 1039 jl: list of rows to be added to uneliminated rows 1040 i>= k: jl(i) is the first row to be added to row i 1041 i< k: jl(i) is the row following row i in some list of rows 1042 jl(i) = mbs indicates the end of a list 1043 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1044 row i of U */ 1045 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1046 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1047 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1048 for (i=0; i<mbs; i++) { 1049 jl[i] = mbs; il[0] = 0; 1050 } 1051 ai = a->i; aj = a->j; aa = a->a; 1052 1053 /* for each row k */ 1054 for (k = 0; k<mbs; k++){ 1055 1056 /*initialize k-th row with elements nonzero in row k of A */ 1057 jmin = ai[k]; jmax = ai[k+1]; 1058 ap = aa + jmin*4; 1059 for (j = jmin; j < jmax; j++){ 1060 vj = aj[j]; /* block col. index */ 1061 rtmp_ptr = rtmp + vj*4; 1062 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 1063 } 1064 1065 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1066 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 1067 i = jl[k]; /* first row to be added to k_th row */ 1068 1069 while (i < k){ 1070 nexti = jl[i]; /* next row to be added to k_th row */ 1071 1072 /* compute multiplier */ 1073 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1074 1075 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1076 diag = ba + i*4; 1077 u = ba + ili*4; 1078 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1079 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1080 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1081 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1082 1083 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1084 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1085 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1086 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1087 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1088 1089 ierr = PetscLogFlops(16.0*2.0);CHKERRQ(ierr); 1090 1091 /* update -U(i,k): ba[ili] = uik */ 1092 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1093 1094 /* add multiple of row i to k-th row ... */ 1095 jmin = ili + 1; jmax = bi[i+1]; 1096 if (jmin < jmax){ 1097 for (j=jmin; j<jmax; j++) { 1098 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1099 rtmp_ptr = rtmp + bj[j]*4; 1100 u = ba + j*4; 1101 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1102 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1103 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1104 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 1105 } 1106 ierr = PetscLogFlops(16.0*(jmax-jmin));CHKERRQ(ierr); 1107 1108 /* ... add i to row list for next nonzero entry */ 1109 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1110 j = bj[jmin]; 1111 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1112 } 1113 i = nexti; 1114 } 1115 1116 /* save nonzero entries in k-th row of U ... */ 1117 1118 /* invert diagonal block */ 1119 diag = ba+k*4; 1120 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1121 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 1122 1123 jmin = bi[k]; jmax = bi[k+1]; 1124 if (jmin < jmax) { 1125 for (j=jmin; j<jmax; j++){ 1126 vj = bj[j]; /* block col. index of U */ 1127 u = ba + j*4; 1128 rtmp_ptr = rtmp + vj*4; 1129 for (k1=0; k1<4; k1++){ 1130 *u++ = *rtmp_ptr; 1131 *rtmp_ptr++ = 0.0; 1132 } 1133 } 1134 1135 /* ... add k to row list for first nonzero entry in k-th row */ 1136 il[k] = jmin; 1137 i = bj[jmin]; 1138 jl[k] = jl[i]; jl[i] = k; 1139 } 1140 } 1141 1142 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1143 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1144 1145 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1146 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1147 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1148 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1149 C->assembled = PETSC_TRUE; 1150 C->preallocated = PETSC_TRUE; 1151 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 1152 PetscFunctionReturn(0); 1153 } 1154 1155 /* 1156 Numeric U^T*D*U factorization for SBAIJ format. 1157 Version for blocks are 1 by 1. 1158 */ 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace" 1161 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info) 1162 { 1163 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1164 IS ip=b->row; 1165 PetscErrorCode ierr; 1166 const PetscInt *ai,*aj,*rip; 1167 PetscInt *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol; 1168 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1169 MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi; 1170 PetscReal rs; 1171 FactorShiftCtx sctx; 1172 1173 PetscFunctionBegin; 1174 /* MatPivotSetUp(): initialize shift context sctx */ 1175 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 1176 1177 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1178 if (!a->permute){ 1179 ai = a->i; aj = a->j; aa = a->a; 1180 } else { 1181 ai = a->inew; aj = a->jnew; 1182 nz = ai[mbs]; 1183 ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1184 a2anew = a->a2anew; 1185 bval = a->a; 1186 for (j=0; j<nz; j++){ 1187 aa[a2anew[j]] = *(bval++); 1188 } 1189 } 1190 1191 /* initialization */ 1192 /* il and jl record the first nonzero element in each row of the accessing 1193 window U(0:k, k:mbs-1). 1194 jl: list of rows to be added to uneliminated rows 1195 i>= k: jl(i) is the first row to be added to row i 1196 i< k: jl(i) is the row following row i in some list of rows 1197 jl(i) = mbs indicates the end of a list 1198 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1199 row i of U */ 1200 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1201 1202 do { 1203 sctx.newshift = PETSC_FALSE; 1204 for (i=0; i<mbs; i++) { 1205 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1206 } 1207 1208 for (k = 0; k<mbs; k++){ 1209 /*initialize k-th row by the perm[k]-th row of A */ 1210 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1211 bval = ba + bi[k]; 1212 for (j = jmin; j < jmax; j++){ 1213 col = rip[aj[j]]; 1214 rtmp[col] = aa[j]; 1215 *bval++ = 0.0; /* for in-place factorization */ 1216 } 1217 1218 /* shift the diagonal of the matrix */ 1219 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1220 1221 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1222 dk = rtmp[k]; 1223 i = jl[k]; /* first row to be added to k_th row */ 1224 1225 while (i < k){ 1226 nexti = jl[i]; /* next row to be added to k_th row */ 1227 1228 /* compute multiplier, update diag(k) and U(i,k) */ 1229 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1230 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1231 dk += uikdi*ba[ili]; 1232 ba[ili] = uikdi; /* -U(i,k) */ 1233 1234 /* add multiple of row i to k-th row */ 1235 jmin = ili + 1; jmax = bi[i+1]; 1236 if (jmin < jmax){ 1237 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1238 ierr = PetscLogFlops(2.0*(jmax-jmin));CHKERRQ(ierr); 1239 1240 /* update il and jl for row i */ 1241 il[i] = jmin; 1242 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1243 } 1244 i = nexti; 1245 } 1246 1247 /* shift the diagonals when zero pivot is detected */ 1248 /* compute rs=sum of abs(off-diagonal) */ 1249 rs = 0.0; 1250 jmin = bi[k]+1; 1251 nz = bi[k+1] - jmin; 1252 if (nz){ 1253 bcol = bj + jmin; 1254 while (nz--){ 1255 rs += PetscAbsScalar(rtmp[*bcol]); 1256 bcol++; 1257 } 1258 } 1259 1260 sctx.rs = rs; 1261 sctx.pv = dk; 1262 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 1263 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1264 dk = sctx.pv; 1265 1266 /* copy data into U(k,:) */ 1267 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1268 jmin = bi[k]+1; jmax = bi[k+1]; 1269 if (jmin < jmax) { 1270 for (j=jmin; j<jmax; j++){ 1271 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1272 } 1273 /* add the k-th row into il and jl */ 1274 il[k] = jmin; 1275 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1276 } 1277 } 1278 } while (sctx.newshift); 1279 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 1280 if (a->permute){ierr = PetscFree(aa);CHKERRQ(ierr);} 1281 1282 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1283 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1284 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1285 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1286 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1287 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1288 C->assembled = PETSC_TRUE; 1289 C->preallocated = PETSC_TRUE; 1290 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 1291 if (sctx.nshift){ 1292 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1293 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1294 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1295 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1296 } 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* 1302 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1303 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1304 */ 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1307 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 1308 { 1309 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 1310 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)B->data; 1311 PetscErrorCode ierr; 1312 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 1313 PetscInt *ai=a->i,*aj=a->j,*ajtmp; 1314 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 1315 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1316 FactorShiftCtx sctx; 1317 PetscReal rs; 1318 MatScalar d,*v; 1319 1320 PetscFunctionBegin; 1321 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 1322 1323 /* MatPivotSetUp(): initialize shift context sctx */ 1324 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 1325 1326 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1327 sctx.shift_top = info->zeropivot; 1328 ierr = PetscMemzero(rtmp,mbs*sizeof(MatScalar));CHKERRQ(ierr); 1329 for (i=0; i<mbs; i++) { 1330 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1331 d = (aa)[a->diag[i]]; 1332 rtmp[i] += - PetscRealPart(d); /* diagonal entry */ 1333 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1334 v = aa + ai[i] + 1; 1335 nz = ai[i+1] - ai[i] - 1 ; 1336 for (j=0; j<nz; j++){ 1337 rtmp[i] += PetscAbsScalar(v[j]); 1338 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1339 } 1340 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1341 } 1342 sctx.shift_top *= 1.1; 1343 sctx.nshift_max = 5; 1344 sctx.shift_lo = 0.; 1345 sctx.shift_hi = 1.; 1346 } 1347 1348 /* allocate working arrays 1349 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1350 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1351 */ 1352 do { 1353 sctx.newshift = PETSC_FALSE; 1354 1355 for (i=0; i<mbs; i++) c2r[i] = mbs; 1356 il[0] = 0; 1357 1358 for (k = 0; k<mbs; k++){ 1359 /* zero rtmp */ 1360 nz = bi[k+1] - bi[k]; 1361 bjtmp = bj + bi[k]; 1362 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 1363 1364 /* load in initial unfactored row */ 1365 bval = ba + bi[k]; 1366 jmin = ai[k]; jmax = ai[k+1]; 1367 for (j = jmin; j < jmax; j++){ 1368 col = aj[j]; 1369 rtmp[col] = aa[j]; 1370 *bval++ = 0.0; /* for in-place factorization */ 1371 } 1372 /* shift the diagonal of the matrix: ZeropivotApply() */ 1373 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1374 1375 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1376 dk = rtmp[k]; 1377 i = c2r[k]; /* first row to be added to k_th row */ 1378 1379 while (i < k){ 1380 nexti = c2r[i]; /* next row to be added to k_th row */ 1381 1382 /* compute multiplier, update diag(k) and U(i,k) */ 1383 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1384 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 1385 dk += uikdi*ba[ili]; /* update diag[k] */ 1386 ba[ili] = uikdi; /* -U(i,k) */ 1387 1388 /* add multiple of row i to k-th row */ 1389 jmin = ili + 1; jmax = bi[i+1]; 1390 if (jmin < jmax){ 1391 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1392 /* update il and c2r for row i */ 1393 il[i] = jmin; 1394 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 1395 } 1396 i = nexti; 1397 } 1398 1399 /* copy data into U(k,:) */ 1400 rs = 0.0; 1401 jmin = bi[k]; jmax = bi[k+1]-1; 1402 if (jmin < jmax) { 1403 for (j=jmin; j<jmax; j++){ 1404 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 1405 } 1406 /* add the k-th row into il and c2r */ 1407 il[k] = jmin; 1408 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 1409 } 1410 1411 sctx.rs = rs; 1412 sctx.pv = dk; 1413 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 1414 if(sctx.newshift) break; 1415 dk = sctx.pv; 1416 1417 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 1418 } 1419 } while (sctx.newshift); 1420 1421 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 1422 1423 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1424 B->ops->solves = MatSolves_SeqSBAIJ_1; 1425 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1426 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1427 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1428 1429 B->assembled = PETSC_TRUE; 1430 B->preallocated = PETSC_TRUE; 1431 ierr = PetscLogFlops(B->rmap->n);CHKERRQ(ierr); 1432 1433 /* MatPivotView() */ 1434 if (sctx.nshift){ 1435 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1436 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 1437 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1438 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1439 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 1440 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 1441 } 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace" 1448 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) 1449 { 1450 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1451 PetscErrorCode ierr; 1452 PetscInt i,j,mbs = a->mbs; 1453 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1454 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 1455 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1456 PetscReal rs; 1457 FactorShiftCtx sctx; 1458 1459 PetscFunctionBegin; 1460 /* MatPivotSetUp(): initialize shift context sctx */ 1461 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 1462 1463 /* initialization */ 1464 /* il and jl record the first nonzero element in each row of the accessing 1465 window U(0:k, k:mbs-1). 1466 jl: list of rows to be added to uneliminated rows 1467 i>= k: jl(i) is the first row to be added to row i 1468 i< k: jl(i) is the row following row i in some list of rows 1469 jl(i) = mbs indicates the end of a list 1470 il(i): points to the first nonzero element in U(i,k:mbs-1) 1471 */ 1472 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1473 ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 1474 1475 do { 1476 sctx.newshift = PETSC_FALSE; 1477 for (i=0; i<mbs; i++) { 1478 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1479 } 1480 1481 for (k = 0; k<mbs; k++){ 1482 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1483 nz = ai[k+1] - ai[k]; 1484 acol = aj + ai[k]; 1485 aval = aa + ai[k]; 1486 bval = ba + bi[k]; 1487 while (nz -- ){ 1488 rtmp[*acol++] = *aval++; 1489 *bval++ = 0.0; /* for in-place factorization */ 1490 } 1491 1492 /* shift the diagonal of the matrix */ 1493 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1494 1495 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1496 dk = rtmp[k]; 1497 i = jl[k]; /* first row to be added to k_th row */ 1498 1499 while (i < k){ 1500 nexti = jl[i]; /* next row to be added to k_th row */ 1501 /* compute multiplier, update D(k) and U(i,k) */ 1502 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1503 uikdi = - ba[ili]*ba[bi[i]]; 1504 dk += uikdi*ba[ili]; 1505 ba[ili] = uikdi; /* -U(i,k) */ 1506 1507 /* add multiple of row i to k-th row ... */ 1508 jmin = ili + 1; 1509 nz = bi[i+1] - jmin; 1510 if (nz > 0){ 1511 bcol = bj + jmin; 1512 bval = ba + jmin; 1513 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 1514 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1515 1516 /* update il and jl for i-th row */ 1517 il[i] = jmin; 1518 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1519 } 1520 i = nexti; 1521 } 1522 1523 /* shift the diagonals when zero pivot is detected */ 1524 /* compute rs=sum of abs(off-diagonal) */ 1525 rs = 0.0; 1526 jmin = bi[k]+1; 1527 nz = bi[k+1] - jmin; 1528 if (nz){ 1529 bcol = bj + jmin; 1530 while (nz--){ 1531 rs += PetscAbsScalar(rtmp[*bcol]); 1532 bcol++; 1533 } 1534 } 1535 1536 sctx.rs = rs; 1537 sctx.pv = dk; 1538 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 1539 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1540 dk = sctx.pv; 1541 1542 /* copy data into U(k,:) */ 1543 ba[bi[k]] = 1.0/dk; 1544 jmin = bi[k]+1; 1545 nz = bi[k+1] - jmin; 1546 if (nz){ 1547 bcol = bj + jmin; 1548 bval = ba + jmin; 1549 while (nz--){ 1550 *bval++ = rtmp[*bcol]; 1551 rtmp[*bcol++] = 0.0; 1552 } 1553 /* add k-th row into il and jl */ 1554 il[k] = jmin; 1555 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1556 } 1557 } /* end of for (k = 0; k<mbs; k++) */ 1558 } while (sctx.newshift); 1559 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1560 ierr = PetscFree2(il,jl);CHKERRQ(ierr); 1561 1562 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1563 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1564 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1565 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1566 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1567 1568 C->assembled = PETSC_TRUE; 1569 C->preallocated = PETSC_TRUE; 1570 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 1571 if (sctx.nshift){ 1572 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1573 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1574 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1575 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1576 } 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1583 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info) 1584 { 1585 PetscErrorCode ierr; 1586 Mat C; 1587 1588 PetscFunctionBegin; 1589 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);CHKERRQ(ierr); 1590 ierr = MatCholeskyFactorSymbolic(C,A,perm,info);CHKERRQ(ierr); 1591 ierr = MatCholeskyFactorNumeric(C,A,info);CHKERRQ(ierr); 1592 A->ops->solve = C->ops->solve; 1593 A->ops->solvetranspose = C->ops->solvetranspose; 1594 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 1599