1 2 #include "src/mat/impls/baij/seq/baij.h" 3 #include "src/mat/impls/sbaij/seq/sbaij.h" 4 #include "src/inline/ilu.h" 5 #include "include/petscis.h" 6 7 #if !defined(PETSC_USE_COMPLEX) 8 /* 9 input: 10 F -- numeric factor 11 output: 12 nneg, nzero, npos: matrix inertia 13 */ 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 17 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos) 18 { 19 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 20 PetscScalar *dd = fact_ptr->a; 21 PetscInt mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i; 22 23 PetscFunctionBegin; 24 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs); 25 nneig_tmp = 0; npos_tmp = 0; 26 for (i=0; i<mbs; i++){ 27 if (PetscRealPart(dd[*fi]) > 0.0){ 28 npos_tmp++; 29 } else if (PetscRealPart(dd[*fi]) < 0.0){ 30 nneig_tmp++; 31 } 32 fi++; 33 } 34 if (nneig) *nneig = nneig_tmp; 35 if (npos) *npos = npos_tmp; 36 if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 37 38 PetscFunctionReturn(0); 39 } 40 #endif /* !defined(PETSC_USE_COMPLEX) */ 41 42 /* 43 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 44 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 45 */ 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ_MSR" 48 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B) 49 { 50 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 51 PetscErrorCode ierr; 52 PetscInt *rip,i,mbs = a->mbs,*ai,*aj; 53 PetscInt *jutmp,bs = A->bs,bs2=a->bs2; 54 PetscInt m,reallocs = 0,prow; 55 PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 56 PetscReal f = info->fill; 57 PetscTruth perm_identity; 58 59 PetscFunctionBegin; 60 /* check whether perm is the identity mapping */ 61 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 62 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 63 64 if (perm_identity){ /* without permutation */ 65 a->permute = PETSC_FALSE; 66 ai = a->i; aj = a->j; 67 } else { /* non-trivial permutation */ 68 a->permute = PETSC_TRUE; 69 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 70 ai = a->inew; aj = a->jnew; 71 } 72 73 /* initialization */ 74 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);CHKERRQ(ierr); 75 umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1; 76 ierr = PetscMalloc(umax*sizeof(PetscInt),&ju);CHKERRQ(ierr); 77 iu[0] = mbs+1; 78 juidx = mbs + 1; /* index for ju */ 79 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 80 q = jl + mbs; /* linked list for col index */ 81 for (i=0; i<mbs; i++){ 82 jl[i] = mbs; 83 q[i] = 0; 84 } 85 86 /* for each row k */ 87 for (k=0; k<mbs; k++){ 88 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 89 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 90 q[k] = mbs; 91 /* initialize nonzero structure of k-th row to row rip[k] of A */ 92 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 93 jmax = ai[rip[k]+1]; 94 for (j=jmin; j<jmax; j++){ 95 vj = rip[aj[j]]; /* col. value */ 96 if(vj > k){ 97 qm = k; 98 do { 99 m = qm; qm = q[m]; 100 } while(qm < vj); 101 if (qm == vj) { 102 SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n"); 103 } 104 nzk++; 105 q[m] = vj; 106 q[vj] = qm; 107 } /* if(vj > k) */ 108 } /* for (j=jmin; j<jmax; j++) */ 109 110 /* modify nonzero structure of k-th row by computing fill-in 111 for each row i to be merged in */ 112 prow = k; 113 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 114 115 while (prow < k){ 116 /* merge row prow into k-th row */ 117 jmin = iu[prow] + 1; jmax = iu[prow+1]; 118 qm = k; 119 for (j=jmin; j<jmax; j++){ 120 vj = ju[j]; 121 do { 122 m = qm; qm = q[m]; 123 } while (qm < vj); 124 if (qm != vj){ 125 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 126 } 127 } 128 prow = jl[prow]; /* next pivot row */ 129 } 130 131 /* add k to row list for first nonzero element in k-th row */ 132 if (nzk > 0){ 133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134 jl[k] = jl[i]; jl[i] = k; 135 } 136 iu[k+1] = iu[k] + nzk; 137 138 /* allocate more space to ju if needed */ 139 if (iu[k+1] > umax) { 140 /* estimate how much additional space we will need */ 141 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 142 /* just double the memory each time */ 143 maxadd = umax; 144 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 145 umax += maxadd; 146 147 /* allocate a longer ju */ 148 ierr = PetscMalloc(umax*sizeof(PetscInt),&jutmp);CHKERRQ(ierr); 149 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));CHKERRQ(ierr); 150 ierr = PetscFree(ju);CHKERRQ(ierr); 151 ju = jutmp; 152 reallocs++; /* count how many times we realloc */ 153 } 154 155 /* save nonzero structure of k-th row in ju */ 156 i=k; 157 while (nzk --) { 158 i = q[i]; 159 ju[juidx++] = i; 160 } 161 } 162 163 #if defined(PETSC_USE_DEBUG) 164 if (ai[mbs] != 0) { 165 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 166 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 167 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 168 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af));CHKERRQ(ierr); 169 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"));CHKERRQ(ierr); 170 } else { 171 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"));CHKERRQ(ierr); 172 } 173 #endif 174 175 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 176 ierr = PetscFree(jl);CHKERRQ(ierr); 177 178 /* put together the new matrix */ 179 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 180 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 181 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 182 183 /* ierr = PetscLogObjectParent(*B,iperm);CHKERRQ(ierr); */ 184 b = (Mat_SeqSBAIJ*)(*B)->data; 185 ierr = PetscFree(b->imax);CHKERRQ(ierr); 186 b->singlemalloc = PETSC_FALSE; 187 /* the next line frees the default space generated by the Create() */ 188 ierr = PetscFree(b->a);CHKERRQ(ierr); 189 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 190 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 191 b->j = ju; 192 b->i = iu; 193 b->diag = 0; 194 b->ilen = 0; 195 b->imax = 0; 196 b->row = perm; 197 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 198 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 199 b->icol = perm; 200 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 201 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 202 /* In b structure: Free imax, ilen, old a, old j. 203 Allocate idnew, solve_work, new a, new j */ 204 ierr = PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 205 b->maxnz = b->nz = iu[mbs]; 206 207 (*B)->factor = FACTOR_CHOLESKY; 208 (*B)->info.factor_mallocs = reallocs; 209 (*B)->info.fill_ratio_given = f; 210 if (ai[mbs] != 0) { 211 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 212 } else { 213 (*B)->info.fill_ratio_needed = 0.0; 214 } 215 216 if (perm_identity){ 217 switch (bs) { 218 case 1: 219 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 220 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 221 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr); 222 break; 223 case 2: 224 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 225 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 226 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 227 break; 228 case 3: 229 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 230 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 231 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 232 break; 233 case 4: 234 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 235 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 236 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 237 break; 238 case 5: 239 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 240 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 241 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 242 break; 243 case 6: 244 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 245 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 246 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 247 break; 248 case 7: 249 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 250 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 251 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 252 break; 253 default: 254 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 255 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 256 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr); 257 break; 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 /* 263 Symbolic U^T*D*U factorization for SBAIJ format. 264 */ 265 #include "petscbt.h" 266 #include "src/mat/utils/freespace.h" 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 269 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 270 { 271 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 272 Mat_SeqSBAIJ *b; 273 Mat B; 274 PetscErrorCode ierr; 275 PetscTruth perm_identity; 276 PetscReal fill = info->fill; 277 PetscInt *rip,i,mbs=a->mbs,bs=A->bs,*ai,*aj,reallocs=0,prow; 278 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 279 PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr; 280 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 281 PetscBT lnkbt; 282 283 PetscFunctionBegin; 284 /* 285 This code originally uses Modified Sparse Row (MSR) storage 286 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise! 287 Then it is rewritten so the factor B takes seqsbaij format. However the associated 288 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 289 thus the original code in MSR format is still used for these cases. 290 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 291 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 292 */ 293 if (bs > 1){ 294 ierr = MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 /* check whether perm is the identity mapping */ 299 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 300 301 if (perm_identity){ 302 a->permute = PETSC_FALSE; 303 ai = a->i; aj = a->j; 304 } else { 305 a->permute = PETSC_TRUE; 306 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 307 ai = a->inew; aj = a->jnew; 308 } 309 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 310 311 /* initialization */ 312 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 313 ui[0] = 0; 314 315 /* jl: linked list for storing indices of the pivot rows 316 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 317 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 318 il = jl + mbs; 319 cols = il + mbs; 320 ui_ptr = (PetscInt**)(cols + mbs); 321 322 for (i=0; i<mbs; i++){ 323 jl[i] = mbs; il[i] = 0; 324 } 325 326 /* create and initialize a linked list for storing column indices of the active row k */ 327 nlnk = mbs + 1; 328 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 331 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 332 current_space = free_space; 333 334 for (k=0; k<mbs; k++){ /* for each active row k */ 335 /* initialize lnk by the column indices of row rip[k] of A */ 336 nzk = 0; 337 ncols = ai[rip[k]+1] - ai[rip[k]]; 338 for (j=0; j<ncols; j++){ 339 i = *(aj + ai[rip[k]] + j); 340 cols[j] = rip[i]; 341 } 342 ierr = PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 343 nzk += nlnk; 344 345 /* update lnk by computing fill-in for each pivot row to be merged in */ 346 prow = jl[k]; /* 1st pivot row */ 347 348 while (prow < k){ 349 nextprow = jl[prow]; 350 /* merge prow into k-th row */ 351 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 352 jmax = ui[prow+1]; 353 ncols = jmax-jmin; 354 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 355 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 356 nzk += nlnk; 357 358 /* update il and jl for prow */ 359 if (jmin < jmax){ 360 il[prow] = jmin; 361 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 362 } 363 prow = nextprow; 364 } 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzk) { 368 i = mbs - k + 1; /* num of unfactored rows */ 369 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 370 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 371 reallocs++; 372 } 373 374 /* copy data into free space, then initialize lnk */ 375 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 376 377 /* add the k-th row into il and jl */ 378 if (nzk-1 > 0){ 379 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 380 jl[k] = jl[i]; jl[i] = k; 381 il[k] = ui[k] + 1; 382 } 383 ui_ptr[k] = current_space->array; 384 current_space->array += nzk; 385 current_space->local_used += nzk; 386 current_space->local_remaining -= nzk; 387 388 ui[k+1] = ui[k] + nzk; 389 } 390 391 #if defined(PETSC_USE_DEBUG) 392 if (ai[mbs] != 0) { 393 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 394 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 395 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 396 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 397 } else { 398 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"));CHKERRQ(ierr); 399 } 400 #endif 401 402 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 403 ierr = PetscFree(jl);CHKERRQ(ierr); 404 405 /* destroy list of free space and other temporary array(s) */ 406 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 407 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 408 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 409 410 /* put together the new matrix in MATSEQSBAIJ format */ 411 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 412 B = *fact; 413 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 414 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,PETSC_NULL);CHKERRQ(ierr); 415 416 b = (Mat_SeqSBAIJ*)B->data; 417 ierr = PetscFree(b->imax);CHKERRQ(ierr); 418 b->singlemalloc = PETSC_FALSE; 419 /* the next line frees the default space generated by the Create() */ 420 ierr = PetscFree(b->a);CHKERRQ(ierr); 421 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 422 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 423 b->j = uj; 424 b->i = ui; 425 b->diag = 0; 426 b->ilen = 0; 427 b->imax = 0; 428 b->row = perm; 429 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 430 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 431 b->icol = perm; 432 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 433 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 434 ierr = PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 435 b->maxnz = b->nz = ui[mbs]; 436 437 B->factor = FACTOR_CHOLESKY; 438 B->info.factor_mallocs = reallocs; 439 B->info.fill_ratio_given = fill; 440 if (ai[mbs] != 0) { 441 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 442 } else { 443 B->info.fill_ratio_needed = 0.0; 444 } 445 446 if (perm_identity){ 447 switch (bs) { 448 case 1: 449 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 450 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 451 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"));CHKERRQ(ierr); 452 break; 453 case 2: 454 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 455 B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 456 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 457 break; 458 case 3: 459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 460 B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 461 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 462 break; 463 case 4: 464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 465 B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 466 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 467 break; 468 case 5: 469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 470 B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 471 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 472 break; 473 case 6: 474 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 475 B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 476 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 477 break; 478 case 7: 479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 480 B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 481 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 482 break; 483 default: 484 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 485 B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 486 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"));CHKERRQ(ierr); 487 break; 488 } 489 } 490 PetscFunctionReturn(0); 491 } 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 494 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,MatFactorInfo *info,Mat *B) 495 { 496 Mat C = *B; 497 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 498 IS perm = b->row; 499 PetscErrorCode ierr; 500 PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 501 PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 502 PetscInt bs=A->bs,bs2 = a->bs2; 503 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 504 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 505 MatScalar *work; 506 PetscInt *pivots; 507 508 PetscFunctionBegin; 509 /* initialization */ 510 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 511 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 512 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 513 jl = il + mbs; 514 for (i=0; i<mbs; i++) { 515 jl[i] = mbs; il[0] = 0; 516 } 517 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 518 uik = dk + bs2; 519 work = uik + bs2; 520 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 521 522 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 523 524 /* check permutation */ 525 if (!a->permute){ 526 ai = a->i; aj = a->j; aa = a->a; 527 } else { 528 ai = a->inew; aj = a->jnew; 529 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 530 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 531 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 532 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 533 534 for (i=0; i<mbs; i++){ 535 jmin = ai[i]; jmax = ai[i+1]; 536 for (j=jmin; j<jmax; j++){ 537 while (a2anew[j] != j){ 538 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 539 for (k1=0; k1<bs2; k1++){ 540 dk[k1] = aa[k*bs2+k1]; 541 aa[k*bs2+k1] = aa[j*bs2+k1]; 542 aa[j*bs2+k1] = dk[k1]; 543 } 544 } 545 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 546 if (i > aj[j]){ 547 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 548 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 549 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 550 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 551 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 552 } 553 } 554 } 555 } 556 ierr = PetscFree(a2anew);CHKERRQ(ierr); 557 } 558 559 /* for each row k */ 560 for (k = 0; k<mbs; k++){ 561 562 /*initialize k-th row with elements nonzero in row perm(k) of A */ 563 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 564 565 ap = aa + jmin*bs2; 566 for (j = jmin; j < jmax; j++){ 567 vj = perm_ptr[aj[j]]; /* block col. index */ 568 rtmp_ptr = rtmp + vj*bs2; 569 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 570 } 571 572 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 573 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 574 i = jl[k]; /* first row to be added to k_th row */ 575 576 while (i < k){ 577 nexti = jl[i]; /* next row to be added to k_th row */ 578 579 /* compute multiplier */ 580 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 581 582 /* uik = -inv(Di)*U_bar(i,k) */ 583 diag = ba + i*bs2; 584 u = ba + ili*bs2; 585 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 586 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 587 588 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 589 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 590 591 /* update -U(i,k) */ 592 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 593 594 /* add multiple of row i to k-th row ... */ 595 jmin = ili + 1; jmax = bi[i+1]; 596 if (jmin < jmax){ 597 for (j=jmin; j<jmax; j++) { 598 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 599 rtmp_ptr = rtmp + bj[j]*bs2; 600 u = ba + j*bs2; 601 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 602 } 603 604 /* ... add i to row list for next nonzero entry */ 605 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 606 j = bj[jmin]; 607 jl[i] = jl[j]; jl[j] = i; /* update jl */ 608 } 609 i = nexti; 610 } 611 612 /* save nonzero entries in k-th row of U ... */ 613 614 /* invert diagonal block */ 615 diag = ba+k*bs2; 616 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 617 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 618 619 jmin = bi[k]; jmax = bi[k+1]; 620 if (jmin < jmax) { 621 for (j=jmin; j<jmax; j++){ 622 vj = bj[j]; /* block col. index of U */ 623 u = ba + j*bs2; 624 rtmp_ptr = rtmp + vj*bs2; 625 for (k1=0; k1<bs2; k1++){ 626 *u++ = *rtmp_ptr; 627 *rtmp_ptr++ = 0.0; 628 } 629 } 630 631 /* ... add k to row list for first nonzero entry in k-th row */ 632 il[k] = jmin; 633 i = bj[jmin]; 634 jl[k] = jl[i]; jl[i] = k; 635 } 636 } 637 638 ierr = PetscFree(rtmp);CHKERRQ(ierr); 639 ierr = PetscFree(il);CHKERRQ(ierr); 640 ierr = PetscFree(dk);CHKERRQ(ierr); 641 ierr = PetscFree(pivots);CHKERRQ(ierr); 642 if (a->permute){ 643 ierr = PetscFree(aa);CHKERRQ(ierr); 644 } 645 646 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 647 C->factor = FACTOR_CHOLESKY; 648 C->assembled = PETSC_TRUE; 649 C->preallocated = PETSC_TRUE; 650 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 656 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B) 657 { 658 Mat C = *B; 659 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 660 PetscErrorCode ierr; 661 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 662 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 663 PetscInt bs=A->bs,bs2 = a->bs2; 664 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 665 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 666 MatScalar *work; 667 PetscInt *pivots; 668 669 PetscFunctionBegin; 670 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 671 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 672 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 673 jl = il + mbs; 674 for (i=0; i<mbs; i++) { 675 jl[i] = mbs; il[0] = 0; 676 } 677 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 678 uik = dk + bs2; 679 work = uik + bs2; 680 ierr = PetscMalloc(bs*sizeof(PetscInt),&pivots);CHKERRQ(ierr); 681 682 ai = a->i; aj = a->j; aa = a->a; 683 684 /* for each row k */ 685 for (k = 0; k<mbs; k++){ 686 687 /*initialize k-th row with elements nonzero in row k of A */ 688 jmin = ai[k]; jmax = ai[k+1]; 689 ap = aa + jmin*bs2; 690 for (j = jmin; j < jmax; j++){ 691 vj = aj[j]; /* block col. index */ 692 rtmp_ptr = rtmp + vj*bs2; 693 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 694 } 695 696 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 697 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 698 i = jl[k]; /* first row to be added to k_th row */ 699 700 while (i < k){ 701 nexti = jl[i]; /* next row to be added to k_th row */ 702 703 /* compute multiplier */ 704 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 705 706 /* uik = -inv(Di)*U_bar(i,k) */ 707 diag = ba + i*bs2; 708 u = ba + ili*bs2; 709 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 710 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 711 712 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 713 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 714 715 /* update -U(i,k) */ 716 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 717 718 /* add multiple of row i to k-th row ... */ 719 jmin = ili + 1; jmax = bi[i+1]; 720 if (jmin < jmax){ 721 for (j=jmin; j<jmax; j++) { 722 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 723 rtmp_ptr = rtmp + bj[j]*bs2; 724 u = ba + j*bs2; 725 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 726 } 727 728 /* ... add i to row list for next nonzero entry */ 729 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 730 j = bj[jmin]; 731 jl[i] = jl[j]; jl[j] = i; /* update jl */ 732 } 733 i = nexti; 734 } 735 736 /* save nonzero entries in k-th row of U ... */ 737 738 /* invert diagonal block */ 739 diag = ba+k*bs2; 740 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 741 ierr = Kernel_A_gets_inverse_A(bs,diag,pivots,work);CHKERRQ(ierr); 742 743 jmin = bi[k]; jmax = bi[k+1]; 744 if (jmin < jmax) { 745 for (j=jmin; j<jmax; j++){ 746 vj = bj[j]; /* block col. index of U */ 747 u = ba + j*bs2; 748 rtmp_ptr = rtmp + vj*bs2; 749 for (k1=0; k1<bs2; k1++){ 750 *u++ = *rtmp_ptr; 751 *rtmp_ptr++ = 0.0; 752 } 753 } 754 755 /* ... add k to row list for first nonzero entry in k-th row */ 756 il[k] = jmin; 757 i = bj[jmin]; 758 jl[k] = jl[i]; jl[i] = k; 759 } 760 } 761 762 ierr = PetscFree(rtmp);CHKERRQ(ierr); 763 ierr = PetscFree(il);CHKERRQ(ierr); 764 ierr = PetscFree(dk);CHKERRQ(ierr); 765 ierr = PetscFree(pivots);CHKERRQ(ierr); 766 767 C->factor = FACTOR_CHOLESKY; 768 C->assembled = PETSC_TRUE; 769 C->preallocated = PETSC_TRUE; 770 ierr = PetscLogFlops(1.3333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 771 PetscFunctionReturn(0); 772 } 773 774 /* 775 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 776 Version for blocks 2 by 2. 777 */ 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 780 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,MatFactorInfo *info,Mat *B) 781 { 782 Mat C = *B; 783 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 784 IS perm = b->row; 785 PetscErrorCode ierr; 786 PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 787 PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 788 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 789 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 790 791 PetscFunctionBegin; 792 /* initialization */ 793 /* il and jl record the first nonzero element in each row of the accessing 794 window U(0:k, k:mbs-1). 795 jl: list of rows to be added to uneliminated rows 796 i>= k: jl(i) is the first row to be added to row i 797 i< k: jl(i) is the row following row i in some list of rows 798 jl(i) = mbs indicates the end of a list 799 il(i): points to the first nonzero element in columns k,...,mbs-1 of 800 row i of U */ 801 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 802 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 803 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 804 jl = il + mbs; 805 for (i=0; i<mbs; i++) { 806 jl[i] = mbs; il[0] = 0; 807 } 808 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 809 uik = dk + 4; 810 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 811 812 /* check permutation */ 813 if (!a->permute){ 814 ai = a->i; aj = a->j; aa = a->a; 815 } else { 816 ai = a->inew; aj = a->jnew; 817 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 818 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 819 ierr = PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);CHKERRQ(ierr); 820 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));CHKERRQ(ierr); 821 822 for (i=0; i<mbs; i++){ 823 jmin = ai[i]; jmax = ai[i+1]; 824 for (j=jmin; j<jmax; j++){ 825 while (a2anew[j] != j){ 826 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 827 for (k1=0; k1<4; k1++){ 828 dk[k1] = aa[k*4+k1]; 829 aa[k*4+k1] = aa[j*4+k1]; 830 aa[j*4+k1] = dk[k1]; 831 } 832 } 833 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 834 if (i > aj[j]){ 835 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 836 ap = aa + j*4; /* ptr to the beginning of the block */ 837 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 838 ap[1] = ap[2]; 839 ap[2] = dk[1]; 840 } 841 } 842 } 843 ierr = PetscFree(a2anew);CHKERRQ(ierr); 844 } 845 846 /* for each row k */ 847 for (k = 0; k<mbs; k++){ 848 849 /*initialize k-th row with elements nonzero in row perm(k) of A */ 850 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 851 ap = aa + jmin*4; 852 for (j = jmin; j < jmax; j++){ 853 vj = perm_ptr[aj[j]]; /* block col. index */ 854 rtmp_ptr = rtmp + vj*4; 855 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 856 } 857 858 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 859 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 860 i = jl[k]; /* first row to be added to k_th row */ 861 862 while (i < k){ 863 nexti = jl[i]; /* next row to be added to k_th row */ 864 865 /* compute multiplier */ 866 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 867 868 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 869 diag = ba + i*4; 870 u = ba + ili*4; 871 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 872 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 873 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 874 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 875 876 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 877 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 878 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 879 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 880 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 881 882 /* update -U(i,k): ba[ili] = uik */ 883 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 884 885 /* add multiple of row i to k-th row ... */ 886 jmin = ili + 1; jmax = bi[i+1]; 887 if (jmin < jmax){ 888 for (j=jmin; j<jmax; j++) { 889 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 890 rtmp_ptr = rtmp + bj[j]*4; 891 u = ba + j*4; 892 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 893 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 894 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 895 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 896 } 897 898 /* ... add i to row list for next nonzero entry */ 899 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 900 j = bj[jmin]; 901 jl[i] = jl[j]; jl[j] = i; /* update jl */ 902 } 903 i = nexti; 904 } 905 906 /* save nonzero entries in k-th row of U ... */ 907 908 /* invert diagonal block */ 909 diag = ba+k*4; 910 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 911 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 912 913 jmin = bi[k]; jmax = bi[k+1]; 914 if (jmin < jmax) { 915 for (j=jmin; j<jmax; j++){ 916 vj = bj[j]; /* block col. index of U */ 917 u = ba + j*4; 918 rtmp_ptr = rtmp + vj*4; 919 for (k1=0; k1<4; k1++){ 920 *u++ = *rtmp_ptr; 921 *rtmp_ptr++ = 0.0; 922 } 923 } 924 925 /* ... add k to row list for first nonzero entry in k-th row */ 926 il[k] = jmin; 927 i = bj[jmin]; 928 jl[k] = jl[i]; jl[i] = k; 929 } 930 } 931 932 ierr = PetscFree(rtmp);CHKERRQ(ierr); 933 ierr = PetscFree(il);CHKERRQ(ierr); 934 ierr = PetscFree(dk);CHKERRQ(ierr); 935 if (a->permute) { 936 ierr = PetscFree(aa);CHKERRQ(ierr); 937 } 938 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 939 C->factor = FACTOR_CHOLESKY; 940 C->assembled = PETSC_TRUE; 941 C->preallocated = PETSC_TRUE; 942 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 943 PetscFunctionReturn(0); 944 } 945 946 /* 947 Version for when blocks are 2 by 2 Using natural ordering 948 */ 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 951 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B) 952 { 953 Mat C = *B; 954 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 955 PetscErrorCode ierr; 956 PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 957 PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 958 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 959 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 960 961 PetscFunctionBegin; 962 /* initialization */ 963 /* il and jl record the first nonzero element in each row of the accessing 964 window U(0:k, k:mbs-1). 965 jl: list of rows to be added to uneliminated rows 966 i>= k: jl(i) is the first row to be added to row i 967 i< k: jl(i) is the row following row i in some list of rows 968 jl(i) = mbs indicates the end of a list 969 il(i): points to the first nonzero element in columns k,...,mbs-1 of 970 row i of U */ 971 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 972 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 973 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 974 jl = il + mbs; 975 for (i=0; i<mbs; i++) { 976 jl[i] = mbs; il[0] = 0; 977 } 978 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 979 uik = dk + 4; 980 981 ai = a->i; aj = a->j; aa = a->a; 982 983 /* for each row k */ 984 for (k = 0; k<mbs; k++){ 985 986 /*initialize k-th row with elements nonzero in row k of A */ 987 jmin = ai[k]; jmax = ai[k+1]; 988 ap = aa + jmin*4; 989 for (j = jmin; j < jmax; j++){ 990 vj = aj[j]; /* block col. index */ 991 rtmp_ptr = rtmp + vj*4; 992 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 993 } 994 995 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 996 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 997 i = jl[k]; /* first row to be added to k_th row */ 998 999 while (i < k){ 1000 nexti = jl[i]; /* next row to be added to k_th row */ 1001 1002 /* compute multiplier */ 1003 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1004 1005 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1006 diag = ba + i*4; 1007 u = ba + ili*4; 1008 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 1009 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 1010 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 1011 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 1012 1013 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1014 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 1015 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 1016 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 1017 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 1018 1019 /* update -U(i,k): ba[ili] = uik */ 1020 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 1021 1022 /* add multiple of row i to k-th row ... */ 1023 jmin = ili + 1; jmax = bi[i+1]; 1024 if (jmin < jmax){ 1025 for (j=jmin; j<jmax; j++) { 1026 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1027 rtmp_ptr = rtmp + bj[j]*4; 1028 u = ba + j*4; 1029 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 1030 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 1031 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 1032 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 1033 } 1034 1035 /* ... add i to row list for next nonzero entry */ 1036 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1037 j = bj[jmin]; 1038 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1039 } 1040 i = nexti; 1041 } 1042 1043 /* save nonzero entries in k-th row of U ... */ 1044 1045 /* invert diagonal block */ 1046 diag = ba+k*4; 1047 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1048 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1049 1050 jmin = bi[k]; jmax = bi[k+1]; 1051 if (jmin < jmax) { 1052 for (j=jmin; j<jmax; j++){ 1053 vj = bj[j]; /* block col. index of U */ 1054 u = ba + j*4; 1055 rtmp_ptr = rtmp + vj*4; 1056 for (k1=0; k1<4; k1++){ 1057 *u++ = *rtmp_ptr; 1058 *rtmp_ptr++ = 0.0; 1059 } 1060 } 1061 1062 /* ... add k to row list for first nonzero entry in k-th row */ 1063 il[k] = jmin; 1064 i = bj[jmin]; 1065 jl[k] = jl[i]; jl[i] = k; 1066 } 1067 } 1068 1069 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1070 ierr = PetscFree(il);CHKERRQ(ierr); 1071 ierr = PetscFree(dk);CHKERRQ(ierr); 1072 1073 C->factor = FACTOR_CHOLESKY; 1074 C->assembled = PETSC_TRUE; 1075 C->preallocated = PETSC_TRUE; 1076 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 Numeric U^T*D*U factorization for SBAIJ format. 1082 Version for blocks are 1 by 1. 1083 */ 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1086 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,MatFactorInfo *info,Mat *B) 1087 { 1088 Mat C = *B; 1089 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1090 IS ip=b->row; 1091 PetscErrorCode ierr; 1092 PetscInt *rip,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol; 1093 PetscInt *ai,*aj,*a2anew; 1094 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1095 MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi; 1096 PetscReal zeropivot,rs,shiftnz; 1097 PetscTruth shiftpd; 1098 ChShift_Ctx sctx; 1099 PetscInt newshift; 1100 1101 PetscFunctionBegin; 1102 /* initialization */ 1103 shiftnz = info->shiftnz; 1104 shiftpd = info->shiftpd; 1105 zeropivot = info->zeropivot; 1106 1107 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1108 if (!a->permute){ 1109 ai = a->i; aj = a->j; aa = a->a; 1110 } else { 1111 ai = a->inew; aj = a->jnew; 1112 nz = ai[mbs]; 1113 ierr = PetscMalloc(nz*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1114 a2anew = a->a2anew; 1115 bval = a->a; 1116 for (j=0; j<nz; j++){ 1117 aa[a2anew[j]] = *(bval++); 1118 } 1119 } 1120 1121 /* initialization */ 1122 /* il and jl record the first nonzero element in each row of the accessing 1123 window U(0:k, k:mbs-1). 1124 jl: list of rows to be added to uneliminated rows 1125 i>= k: jl(i) is the first row to be added to row i 1126 i< k: jl(i) is the row following row i in some list of rows 1127 jl(i) = mbs indicates the end of a list 1128 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1129 row i of U */ 1130 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1131 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1132 jl = il + mbs; 1133 rtmp = (MatScalar*)(jl + mbs); 1134 1135 sctx.shift_amount = 0; 1136 sctx.nshift = 0; 1137 do { 1138 sctx.chshift = PETSC_FALSE; 1139 for (i=0; i<mbs; i++) { 1140 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1141 } 1142 1143 for (k = 0; k<mbs; k++){ 1144 /*initialize k-th row by the perm[k]-th row of A */ 1145 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1146 bval = ba + bi[k]; 1147 for (j = jmin; j < jmax; j++){ 1148 col = rip[aj[j]]; 1149 rtmp[col] = aa[j]; 1150 *bval++ = 0.0; /* for in-place factorization */ 1151 } 1152 1153 /* shift the diagonal of the matrix */ 1154 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1155 1156 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1157 dk = rtmp[k]; 1158 i = jl[k]; /* first row to be added to k_th row */ 1159 1160 while (i < k){ 1161 nexti = jl[i]; /* next row to be added to k_th row */ 1162 1163 /* compute multiplier, update diag(k) and U(i,k) */ 1164 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1165 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1166 dk += uikdi*ba[ili]; 1167 ba[ili] = uikdi; /* -U(i,k) */ 1168 1169 /* add multiple of row i to k-th row */ 1170 jmin = ili + 1; jmax = bi[i+1]; 1171 if (jmin < jmax){ 1172 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1173 /* update il and jl for row i */ 1174 il[i] = jmin; 1175 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1176 } 1177 i = nexti; 1178 } 1179 1180 /* shift the diagonals when zero pivot is detected */ 1181 /* compute rs=sum of abs(off-diagonal) */ 1182 rs = 0.0; 1183 jmin = bi[k]+1; 1184 nz = bi[k+1] - jmin; 1185 if (nz){ 1186 bcol = bj + jmin; 1187 while (nz--){ 1188 rs += PetscAbsScalar(rtmp[*bcol]); 1189 bcol++; 1190 } 1191 } 1192 1193 sctx.rs = rs; 1194 sctx.pv = dk; 1195 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1196 if (newshift == 1){ 1197 break; /* sctx.shift_amount is updated */ 1198 } else if (newshift == -1){ 1199 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1200 } 1201 1202 /* copy data into U(k,:) */ 1203 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1204 jmin = bi[k]+1; jmax = bi[k+1]; 1205 if (jmin < jmax) { 1206 for (j=jmin; j<jmax; j++){ 1207 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1208 } 1209 /* add the k-th row into il and jl */ 1210 il[k] = jmin; 1211 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1212 } 1213 } 1214 } while (sctx.chshift); 1215 ierr = PetscFree(il);CHKERRQ(ierr); 1216 if (a->permute){ierr = PetscFree(aa);CHKERRQ(ierr);} 1217 1218 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1219 C->factor = FACTOR_CHOLESKY; 1220 C->assembled = PETSC_TRUE; 1221 C->preallocated = PETSC_TRUE; 1222 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1223 if (sctx.nshift){ 1224 if (shiftnz) { 1225 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqSBAIJ_1: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1226 } else if (shiftpd) { 1227 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqSBAIJ_1: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1228 } 1229 } 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /* 1234 Version for when blocks are 1 by 1 Using natural ordering 1235 */ 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1238 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B) 1239 { 1240 Mat C = *B; 1241 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1242 PetscErrorCode ierr; 1243 PetscInt i,j,mbs = a->mbs; 1244 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1245 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 1246 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1247 PetscReal zeropivot,rs,shiftnz; 1248 PetscTruth shiftpd; 1249 ChShift_Ctx sctx; 1250 PetscInt newshift; 1251 1252 PetscFunctionBegin; 1253 /* initialization */ 1254 shiftnz = info->shiftnz; 1255 shiftpd = info->shiftpd; 1256 zeropivot = info->zeropivot; 1257 1258 /* il and jl record the first nonzero element in each row of the accessing 1259 window U(0:k, k:mbs-1). 1260 jl: list of rows to be added to uneliminated rows 1261 i>= k: jl(i) is the first row to be added to row i 1262 i< k: jl(i) is the row following row i in some list of rows 1263 jl(i) = mbs indicates the end of a list 1264 il(i): points to the first nonzero element in U(i,k:mbs-1) 1265 */ 1266 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1267 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&il);CHKERRQ(ierr); 1268 jl = il + mbs; 1269 1270 sctx.shift_amount = 0; 1271 sctx.nshift = 0; 1272 do { 1273 sctx.chshift = PETSC_FALSE; 1274 for (i=0; i<mbs; i++) { 1275 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1276 } 1277 1278 for (k = 0; k<mbs; k++){ 1279 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1280 nz = ai[k+1] - ai[k]; 1281 acol = aj + ai[k]; 1282 aval = aa + ai[k]; 1283 bval = ba + bi[k]; 1284 while (nz -- ){ 1285 rtmp[*acol++] = *aval++; 1286 *bval++ = 0.0; /* for in-place factorization */ 1287 } 1288 1289 /* shift the diagonal of the matrix */ 1290 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1291 1292 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1293 dk = rtmp[k]; 1294 i = jl[k]; /* first row to be added to k_th row */ 1295 1296 while (i < k){ 1297 nexti = jl[i]; /* next row to be added to k_th row */ 1298 /* compute multiplier, update D(k) and U(i,k) */ 1299 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1300 uikdi = - ba[ili]*ba[bi[i]]; 1301 dk += uikdi*ba[ili]; 1302 ba[ili] = uikdi; /* -U(i,k) */ 1303 1304 /* add multiple of row i to k-th row ... */ 1305 jmin = ili + 1; 1306 nz = bi[i+1] - jmin; 1307 if (nz > 0){ 1308 bcol = bj + jmin; 1309 bval = ba + jmin; 1310 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1311 /* update il and jl for i-th row */ 1312 il[i] = jmin; 1313 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1314 } 1315 i = nexti; 1316 } 1317 1318 /* shift the diagonals when zero pivot is detected */ 1319 /* compute rs=sum of abs(off-diagonal) */ 1320 rs = 0.0; 1321 jmin = bi[k]+1; 1322 nz = bi[k+1] - jmin; 1323 if (nz){ 1324 bcol = bj + jmin; 1325 while (nz--){ 1326 rs += PetscAbsScalar(rtmp[*bcol]); 1327 bcol++; 1328 } 1329 } 1330 1331 sctx.rs = rs; 1332 sctx.pv = dk; 1333 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1334 if (newshift == 1){ 1335 break; /* sctx.shift_amount is updated */ 1336 } else if (newshift == -1){ 1337 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1338 } 1339 1340 /* copy data into U(k,:) */ 1341 ba[bi[k]] = 1.0/dk; 1342 jmin = bi[k]+1; 1343 nz = bi[k+1] - jmin; 1344 if (nz){ 1345 bcol = bj + jmin; 1346 bval = ba + jmin; 1347 while (nz--){ 1348 *bval++ = rtmp[*bcol]; 1349 rtmp[*bcol++] = 0.0; 1350 } 1351 /* add k-th row into il and jl */ 1352 il[k] = jmin; 1353 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1354 } 1355 } /* end of for (k = 0; k<mbs; k++) */ 1356 } while (sctx.chshift); 1357 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1358 ierr = PetscFree(il);CHKERRQ(ierr); 1359 1360 C->factor = FACTOR_CHOLESKY; 1361 C->assembled = PETSC_TRUE; 1362 C->preallocated = PETSC_TRUE; 1363 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1364 if (sctx.nshift){ 1365 if (shiftnz) { 1366 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1367 } else if (shiftpd) { 1368 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1369 } 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1376 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 1377 { 1378 PetscErrorCode ierr; 1379 Mat C; 1380 1381 PetscFunctionBegin; 1382 ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1383 ierr = MatCholeskyFactorNumeric(A,info,&C);CHKERRQ(ierr); 1384 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 1389