1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 /* 400 b->lu_damping = info->damping; 401 b->lu_zeropivot = info->zeropivot; 402 b->lu_shift = info->shift; 403 b->lu_shift_fraction = info->shift_fraction; 404 */ 405 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 406 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 407 b->icol = isicol; 408 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 409 /* In b structure: Free imax, ilen, old a, old j. 410 Allocate idnew, solve_work, new a, new j */ 411 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 412 b->maxnz = b->nz = ainew[n] ; 413 414 (*B)->factor = FACTOR_LU; 415 (*B)->info.factor_mallocs = reallocs; 416 (*B)->info.fill_ratio_given = f; 417 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 418 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 419 420 if (ai[n] != 0) { 421 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 422 } else { 423 (*B)->info.fill_ratio_needed = 0.0; 424 } 425 PetscFunctionReturn(0); 426 } 427 /* ----------------------------------------------------------- */ 428 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 439 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj,nshift=0; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscReal zeropivot=b->lu_zeropivot,rs,d; 443 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.; 444 PetscTruth lushift; 445 PetscReal shift_nonzero; /* shift_pd=b->lu_shift, =b->lu_damping; */ 446 PetscTruth shift_pd; 447 448 PetscFunctionBegin; 449 450 shift_nonzero = info->shiftnz; 451 shift_pd = info->shiftpd; 452 zeropivot = info->zeropivot; 453 shift_fraction = info->shift_fraction; 454 455 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 456 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 457 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 458 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 459 rtmps = rtmp; ics = ic; 460 461 if (!a->diag) { 462 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 463 } 464 /* if both shift schemes are chosen by user, only use shift_pd */ 465 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 466 if (shift_pd) { /* set shift_top=max{row_shift} */ 467 PetscInt *aai = a->i,*ddiag = a->diag; 468 shift_top = 0; 469 for (i=0; i<n; i++) { 470 d = PetscAbsScalar((a->a)[ddiag[i]]); 471 /* calculate amt of shift needed for this row */ 472 if (d<=0) { 473 row_shift = 0; 474 } else { 475 row_shift = -2*d; 476 } 477 v = a->a+aai[i]; 478 nz = aai[i+1] - aai[i]; 479 for (j=0; j<nz; j++) 480 row_shift += PetscAbsScalar(v[j]); 481 if (row_shift>shift_top) shift_top = row_shift; 482 } 483 } 484 485 /* shift_fraction = b->lu_shift_fraction; */ 486 shift_amount = 0; 487 do { 488 lushift = PETSC_FALSE; 489 for (i=0; i<n; i++){ 490 nz = ai[i+1] - ai[i]; 491 ajtmp = aj + ai[i]; 492 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 493 494 /* load in initial (unfactored row) */ 495 nz = a->i[r[i]+1] - a->i[r[i]]; 496 ajtmpold = a->j + a->i[r[i]]; 497 v = a->a + a->i[r[i]]; 498 for (j=0; j<nz; j++) { 499 rtmp[ics[ajtmpold[j]]] = v[j]; 500 } 501 rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */ 502 503 row = *ajtmp++; 504 while (row < i) { 505 pc = rtmp + row; 506 if (*pc != 0.0) { 507 pv = b->a + diag_offset[row]; 508 pj = b->j + diag_offset[row] + 1; 509 multiplier = *pc / *pv++; 510 *pc = multiplier; 511 nz = ai[row+1] - diag_offset[row] - 1; 512 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 513 PetscLogFlops(2*nz); 514 } 515 row = *ajtmp++; 516 } 517 /* finished row so stick it into b->a */ 518 pv = b->a + ai[i] ; 519 pj = b->j + ai[i] ; 520 nz = ai[i+1] - ai[i]; 521 diag = diag_offset[i] - ai[i]; 522 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 523 rs = 0.0; 524 for (j=0; j<nz; j++) { 525 pv[j] = rtmps[pj[j]]; 526 if (j != diag) rs += PetscAbsScalar(pv[j]); 527 } 528 /* shift the diagonals when zero pivot is detected */ 529 #define MAX_NSHIFT 5 530 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){ 531 /* force |diag(*B)| > zeropivot*rs */ 532 if (!nshift){ 533 shift_amount = shift_nonzero; 534 } else { 535 shift_amount *= 2.0; 536 } 537 lushift = PETSC_TRUE; 538 nshift++; 539 break; 540 } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){ 541 /* force *B to be diagonally dominant */ 542 if (nshift>MAX_NSHIFT) { 543 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 544 } else if (nshift==MAX_NSHIFT) { 545 shift_fraction = shift_hi; 546 lushift = PETSC_FALSE; 547 } else { 548 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 549 lushift = PETSC_TRUE; 550 } 551 shift_amount = shift_fraction * shift_top; 552 nshift++; 553 break; 554 } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){ 555 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 556 } 557 } 558 if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) { 559 /* 560 * if no shift in this attempt & shifting & started shifting & can refine, 561 * then try lower shift 562 */ 563 shift_hi = shift_fraction; 564 shift_fraction = (shift_hi+shift_lo)/2.; 565 shift_amount = shift_fraction * shift_top; 566 lushift = PETSC_TRUE; 567 nshift++; 568 } 569 } while (lushift); 570 571 /* invert diagonal entries for simplier triangular solves */ 572 for (i=0; i<n; i++) { 573 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 574 } 575 576 ierr = PetscFree(rtmp);CHKERRQ(ierr); 577 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 578 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 579 C->factor = FACTOR_LU; 580 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 581 C->assembled = PETSC_TRUE; 582 PetscLogFlops(C->n); 583 if (nshift){ 584 if (shift_nonzero) { 585 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount %g\n",nshift,shift_amount); 586 } else if (shift_pd) { 587 b->lu_shift_fraction = shift_fraction; 588 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 589 } 590 } 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 596 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 597 { 598 PetscFunctionBegin; 599 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 600 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 601 PetscFunctionReturn(0); 602 } 603 604 605 /* ----------------------------------------------------------- */ 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatLUFactor_SeqAIJ" 608 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 609 { 610 PetscErrorCode ierr; 611 Mat C; 612 613 PetscFunctionBegin; 614 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 615 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 616 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 617 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 618 PetscFunctionReturn(0); 619 } 620 /* ----------------------------------------------------------- */ 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatSolve_SeqAIJ" 623 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 624 { 625 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 626 IS iscol = a->col,isrow = a->row; 627 PetscErrorCode ierr; 628 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 629 PetscInt nz,*rout,*cout; 630 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 631 632 PetscFunctionBegin; 633 if (!n) PetscFunctionReturn(0); 634 635 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 636 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 637 tmp = a->solve_work; 638 639 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 640 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 641 642 /* forward solve the lower triangular */ 643 tmp[0] = b[*r++]; 644 tmps = tmp; 645 for (i=1; i<n; i++) { 646 v = aa + ai[i] ; 647 vi = aj + ai[i] ; 648 nz = a->diag[i] - ai[i]; 649 sum = b[*r++]; 650 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 651 tmp[i] = sum; 652 } 653 654 /* backward solve the upper triangular */ 655 for (i=n-1; i>=0; i--){ 656 v = aa + a->diag[i] + 1; 657 vi = aj + a->diag[i] + 1; 658 nz = ai[i+1] - a->diag[i] - 1; 659 sum = tmp[i]; 660 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 661 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 662 } 663 664 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 665 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 666 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 PetscLogFlops(2*a->nz - A->n); 669 PetscFunctionReturn(0); 670 } 671 672 /* ----------------------------------------------------------- */ 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 675 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 676 { 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 PetscErrorCode ierr; 679 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 680 PetscScalar *x,*b,*aa = a->a; 681 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 682 PetscInt adiag_i,i,*vi,nz,ai_i; 683 PetscScalar *v,sum; 684 #endif 685 686 PetscFunctionBegin; 687 if (!n) PetscFunctionReturn(0); 688 689 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 692 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 693 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 694 #else 695 /* forward solve the lower triangular */ 696 x[0] = b[0]; 697 for (i=1; i<n; i++) { 698 ai_i = ai[i]; 699 v = aa + ai_i; 700 vi = aj + ai_i; 701 nz = adiag[i] - ai_i; 702 sum = b[i]; 703 while (nz--) sum -= *v++ * x[*vi++]; 704 x[i] = sum; 705 } 706 707 /* backward solve the upper triangular */ 708 for (i=n-1; i>=0; i--){ 709 adiag_i = adiag[i]; 710 v = aa + adiag_i + 1; 711 vi = aj + adiag_i + 1; 712 nz = ai[i+1] - adiag_i - 1; 713 sum = x[i]; 714 while (nz--) sum -= *v++ * x[*vi++]; 715 x[i] = sum*aa[adiag_i]; 716 } 717 #endif 718 PetscLogFlops(2*a->nz - A->n); 719 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 726 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 727 { 728 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 729 IS iscol = a->col,isrow = a->row; 730 PetscErrorCode ierr; 731 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 732 PetscInt nz,*rout,*cout; 733 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 734 735 PetscFunctionBegin; 736 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 737 738 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 739 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 740 tmp = a->solve_work; 741 742 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 743 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 744 745 /* forward solve the lower triangular */ 746 tmp[0] = b[*r++]; 747 for (i=1; i<n; i++) { 748 v = aa + ai[i] ; 749 vi = aj + ai[i] ; 750 nz = a->diag[i] - ai[i]; 751 sum = b[*r++]; 752 while (nz--) sum -= *v++ * tmp[*vi++ ]; 753 tmp[i] = sum; 754 } 755 756 /* backward solve the upper triangular */ 757 for (i=n-1; i>=0; i--){ 758 v = aa + a->diag[i] + 1; 759 vi = aj + a->diag[i] + 1; 760 nz = ai[i+1] - a->diag[i] - 1; 761 sum = tmp[i]; 762 while (nz--) sum -= *v++ * tmp[*vi++ ]; 763 tmp[i] = sum*aa[a->diag[i]]; 764 x[*c--] += tmp[i]; 765 } 766 767 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 768 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 769 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 770 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 771 PetscLogFlops(2*a->nz); 772 773 PetscFunctionReturn(0); 774 } 775 /* -------------------------------------------------------------------*/ 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 778 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 779 { 780 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 781 IS iscol = a->col,isrow = a->row; 782 PetscErrorCode ierr; 783 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 784 PetscInt nz,*rout,*cout,*diag = a->diag; 785 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 786 787 PetscFunctionBegin; 788 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 790 tmp = a->solve_work; 791 792 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 793 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 794 795 /* copy the b into temp work space according to permutation */ 796 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 797 798 /* forward solve the U^T */ 799 for (i=0; i<n; i++) { 800 v = aa + diag[i] ; 801 vi = aj + diag[i] + 1; 802 nz = ai[i+1] - diag[i] - 1; 803 s1 = tmp[i]; 804 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 805 while (nz--) { 806 tmp[*vi++ ] -= (*v++)*s1; 807 } 808 tmp[i] = s1; 809 } 810 811 /* backward solve the L^T */ 812 for (i=n-1; i>=0; i--){ 813 v = aa + diag[i] - 1 ; 814 vi = aj + diag[i] - 1 ; 815 nz = diag[i] - ai[i]; 816 s1 = tmp[i]; 817 while (nz--) { 818 tmp[*vi-- ] -= (*v--)*s1; 819 } 820 } 821 822 /* copy tmp into x according to permutation */ 823 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 824 825 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 826 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 827 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 828 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 829 830 PetscLogFlops(2*a->nz-A->n); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 836 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 837 { 838 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 839 IS iscol = a->col,isrow = a->row; 840 PetscErrorCode ierr; 841 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 842 PetscInt nz,*rout,*cout,*diag = a->diag; 843 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 844 845 PetscFunctionBegin; 846 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 847 848 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 tmp = a->solve_work; 851 852 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 853 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 854 855 /* copy the b into temp work space according to permutation */ 856 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 857 858 /* forward solve the U^T */ 859 for (i=0; i<n; i++) { 860 v = aa + diag[i] ; 861 vi = aj + diag[i] + 1; 862 nz = ai[i+1] - diag[i] - 1; 863 tmp[i] *= *v++; 864 while (nz--) { 865 tmp[*vi++ ] -= (*v++)*tmp[i]; 866 } 867 } 868 869 /* backward solve the L^T */ 870 for (i=n-1; i>=0; i--){ 871 v = aa + diag[i] - 1 ; 872 vi = aj + diag[i] - 1 ; 873 nz = diag[i] - ai[i]; 874 while (nz--) { 875 tmp[*vi-- ] -= (*v--)*tmp[i]; 876 } 877 } 878 879 /* copy tmp into x according to permutation */ 880 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 881 882 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 883 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 884 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 885 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 886 887 PetscLogFlops(2*a->nz); 888 PetscFunctionReturn(0); 889 } 890 /* ----------------------------------------------------------------*/ 891 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 896 { 897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 898 IS isicol; 899 PetscErrorCode ierr; 900 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 901 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 902 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 903 PetscInt incrlev,nnz,i,levels,diagonal_fill; 904 PetscTruth col_identity,row_identity; 905 PetscReal f; 906 907 PetscFunctionBegin; 908 f = info->fill; 909 levels = (PetscInt)info->levels; 910 diagonal_fill = (PetscInt)info->diagonal_fill; 911 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 912 913 /* special case that simply copies fill pattern */ 914 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 915 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 916 if (!levels && row_identity && col_identity) { 917 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 918 (*fact)->factor = FACTOR_LU; 919 b = (Mat_SeqAIJ*)(*fact)->data; 920 if (!b->diag) { 921 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 922 } 923 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 924 b->row = isrow; 925 b->col = iscol; 926 b->icol = isicol; 927 /* 928 b->lu_damping = info->damping; 929 b->lu_zeropivot = info->zeropivot; 930 b->lu_shift = info->shift; 931 b->lu_shift_fraction= info->shift_fraction; 932 */ 933 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 934 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 935 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 936 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 940 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 941 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 942 943 /* get new row pointers */ 944 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 945 ainew[0] = 0; 946 /* don't know how many column pointers are needed so estimate */ 947 jmax = (PetscInt)(f*(ai[n]+1)); 948 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 949 /* ajfill is level of fill for each fill entry */ 950 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 951 /* fill is a linked list of nonzeros in active row */ 952 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 953 /* im is level for each filled value */ 954 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 955 /* dloc is location of diagonal in factor */ 956 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 957 dloc[0] = 0; 958 for (prow=0; prow<n; prow++) { 959 960 /* copy current row into linked list */ 961 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 962 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 963 xi = aj + ai[r[prow]]; 964 fill[n] = n; 965 fill[prow] = -1; /* marker to indicate if diagonal exists */ 966 while (nz--) { 967 fm = n; 968 idx = ic[*xi++ ]; 969 do { 970 m = fm; 971 fm = fill[m]; 972 } while (fm < idx); 973 fill[m] = idx; 974 fill[idx] = fm; 975 im[idx] = 0; 976 } 977 978 /* make sure diagonal entry is included */ 979 if (diagonal_fill && fill[prow] == -1) { 980 fm = n; 981 while (fill[fm] < prow) fm = fill[fm]; 982 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 983 fill[fm] = prow; 984 im[prow] = 0; 985 nzf++; 986 dcount++; 987 } 988 989 nzi = 0; 990 row = fill[n]; 991 while (row < prow) { 992 incrlev = im[row] + 1; 993 nz = dloc[row]; 994 xi = ajnew + ainew[row] + nz + 1; 995 flev = ajfill + ainew[row] + nz + 1; 996 nnz = ainew[row+1] - ainew[row] - nz - 1; 997 fm = row; 998 while (nnz-- > 0) { 999 idx = *xi++ ; 1000 if (*flev + incrlev > levels) { 1001 flev++; 1002 continue; 1003 } 1004 do { 1005 m = fm; 1006 fm = fill[m]; 1007 } while (fm < idx); 1008 if (fm != idx) { 1009 im[idx] = *flev + incrlev; 1010 fill[m] = idx; 1011 fill[idx] = fm; 1012 fm = idx; 1013 nzf++; 1014 } else { 1015 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1016 } 1017 flev++; 1018 } 1019 row = fill[row]; 1020 nzi++; 1021 } 1022 /* copy new filled row into permanent storage */ 1023 ainew[prow+1] = ainew[prow] + nzf; 1024 if (ainew[prow+1] > jmax) { 1025 1026 /* estimate how much additional space we will need */ 1027 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1028 /* just double the memory each time */ 1029 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1030 PetscInt maxadd = jmax; 1031 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1032 jmax += maxadd; 1033 1034 /* allocate a longer ajnew and ajfill */ 1035 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1036 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1037 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1038 ajnew = xi; 1039 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1040 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ajfill = xi; 1043 reallocs++; /* count how many times we realloc */ 1044 } 1045 xi = ajnew + ainew[prow] ; 1046 flev = ajfill + ainew[prow] ; 1047 dloc[prow] = nzi; 1048 fm = fill[n]; 1049 while (nzf--) { 1050 *xi++ = fm ; 1051 *flev++ = im[fm]; 1052 fm = fill[fm]; 1053 } 1054 /* make sure row has diagonal entry */ 1055 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1056 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1057 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1058 } 1059 } 1060 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1061 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1062 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1063 ierr = PetscFree(fill);CHKERRQ(ierr); 1064 ierr = PetscFree(im);CHKERRQ(ierr); 1065 1066 { 1067 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1068 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1069 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1070 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1071 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1072 if (diagonal_fill) { 1073 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1074 } 1075 } 1076 1077 /* put together the new matrix */ 1078 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1079 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1080 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1081 PetscLogObjectParent(*fact,isicol); 1082 b = (Mat_SeqAIJ*)(*fact)->data; 1083 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1084 b->singlemalloc = PETSC_FALSE; 1085 len = (ainew[n] )*sizeof(PetscScalar); 1086 /* the next line frees the default space generated by the Create() */ 1087 ierr = PetscFree(b->a);CHKERRQ(ierr); 1088 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1089 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1090 b->j = ajnew; 1091 b->i = ainew; 1092 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1093 b->diag = dloc; 1094 b->ilen = 0; 1095 b->imax = 0; 1096 b->row = isrow; 1097 b->col = iscol; 1098 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1099 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1100 b->icol = isicol; 1101 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1102 /* In b structure: Free imax, ilen, old a, old j. 1103 Allocate dloc, solve_work, new a, new j */ 1104 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1105 b->maxnz = b->nz = ainew[n] ; 1106 /* 1107 b->lu_damping = info->damping; 1108 b->lu_shift = info->shift; 1109 b->lu_shift_fraction = info->shift_fraction; 1110 b->lu_zeropivot = info->zeropivot; 1111 */ 1112 (*fact)->factor = FACTOR_LU; 1113 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1114 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1115 1116 (*fact)->info.factor_mallocs = reallocs; 1117 (*fact)->info.fill_ratio_given = f; 1118 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #include "src/mat/impls/sbaij/seq/sbaij.h" 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1125 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1126 { 1127 Mat C = *B; 1128 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1129 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1130 IS ip=b->row; 1131 PetscErrorCode ierr; 1132 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1133 PetscInt *ai=a->i,*aj=a->j; 1134 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1135 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1136 PetscReal zeropivot=b->factor_zeropivot,shift_amount,rs; 1137 PetscTruth chshift; 1138 PetscInt nshift=0; 1139 PetscReal shift_pd=b->factor_shift,shift_nonzero=b->factor_damping; 1140 1141 PetscFunctionBegin; 1142 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1143 1144 /* initialization */ 1145 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1146 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1147 jl = il + mbs; 1148 rtmp = (MatScalar*)(jl + mbs); 1149 1150 shift_amount = 0; 1151 do { 1152 chshift = PETSC_FALSE; 1153 for (i=0; i<mbs; i++) { 1154 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1155 } 1156 1157 for (k = 0; k<mbs; k++){ 1158 bval = ba + bi[k]; 1159 /* initialize k-th row by the perm[k]-th row of A */ 1160 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1161 for (j = jmin; j < jmax; j++){ 1162 col = rip[aj[j]]; 1163 if (col >= k){ /* only take upper triangular entry */ 1164 rtmp[col] = aa[j]; 1165 *bval++ = 0.0; /* for in-place factorization */ 1166 } 1167 } 1168 /* shift the diagonal of the matrix */ 1169 if (nshift) rtmp[k] += shift_amount; 1170 1171 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1172 dk = rtmp[k]; 1173 i = jl[k]; /* first row to be added to k_th row */ 1174 1175 while (i < k){ 1176 nexti = jl[i]; /* next row to be added to k_th row */ 1177 1178 /* compute multiplier, update diag(k) and U(i,k) */ 1179 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1180 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1181 dk += uikdi*ba[ili]; 1182 ba[ili] = uikdi; /* -U(i,k) */ 1183 1184 /* add multiple of row i to k-th row */ 1185 jmin = ili + 1; jmax = bi[i+1]; 1186 if (jmin < jmax){ 1187 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1188 /* update il and jl for row i */ 1189 il[i] = jmin; 1190 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1191 } 1192 i = nexti; 1193 } 1194 1195 /* shift the diagonals when zero pivot is detected */ 1196 /* compute rs=sum of abs(off-diagonal) */ 1197 rs = 0.0; 1198 jmin = bi[k]+1; 1199 nz = bi[k+1] - jmin; 1200 if (nz){ 1201 bcol = bj + jmin; 1202 while (nz--){ 1203 rs += PetscAbsScalar(rtmp[*bcol++]); 1204 } 1205 } 1206 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1207 /* force |diag(*B)| > zeropivot*rs */ 1208 if (!nshift){ 1209 shift_amount = shift_nonzero; 1210 } else { 1211 shift_amount *= 2.0; 1212 } 1213 chshift = PETSC_TRUE; 1214 nshift++; 1215 break; 1216 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1217 /* calculate a shift that would make this row diagonally dominant */ 1218 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1219 chshift = PETSC_TRUE; 1220 /* Unlike in the ILU case there is no exit condition on nshift: 1221 we increase the shift until it converges. There is no guarantee that 1222 this algorithm converges faster or slower, or is better or worse 1223 than the ILU algorithm. */ 1224 nshift++; 1225 break; 1226 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1227 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1228 } 1229 1230 /* copy data into U(k,:) */ 1231 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1232 jmin = bi[k]+1; jmax = bi[k+1]; 1233 if (jmin < jmax) { 1234 for (j=jmin; j<jmax; j++){ 1235 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1236 } 1237 /* add the k-th row into il and jl */ 1238 il[k] = jmin; 1239 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1240 } 1241 } 1242 } while (chshift); 1243 ierr = PetscFree(il);CHKERRQ(ierr); 1244 1245 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1246 C->factor = FACTOR_CHOLESKY; 1247 C->assembled = PETSC_TRUE; 1248 C->preallocated = PETSC_TRUE; 1249 PetscLogFlops(C->m); 1250 if (nshift){ 1251 if (shift_nonzero) { 1252 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1253 } else if (shift_pd) { 1254 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #include "petscbt.h" 1261 #include "src/mat/utils/freespace.h" 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1264 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1265 { 1266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1267 Mat_SeqSBAIJ *b; 1268 Mat B; 1269 PetscErrorCode ierr; 1270 PetscTruth perm_identity; 1271 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1272 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1273 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1274 PetscReal fill=info->fill,levels=info->levels; 1275 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1276 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1277 PetscBT lnkbt; 1278 1279 PetscFunctionBegin; 1280 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1281 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1282 1283 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1284 ui[0] = 0; 1285 1286 /* special case that simply copies fill pattern */ 1287 if (!levels && perm_identity) { 1288 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1289 for (i=0; i<am; i++) { 1290 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1291 } 1292 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1293 cols = uj; 1294 for (i=0; i<am; i++) { 1295 aj = a->j + a->diag[i]; 1296 ncols = ui[i+1] - ui[i]; 1297 for (j=0; j<ncols; j++) *cols++ = *aj++; 1298 } 1299 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1300 /* initialization */ 1301 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1302 1303 /* jl: linked list for storing indices of the pivot rows 1304 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1305 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1306 il = jl + am; 1307 uj_ptr = (PetscInt**)(il + am); 1308 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1309 for (i=0; i<am; i++){ 1310 jl[i] = am; il[i] = 0; 1311 } 1312 1313 /* create and initialize a linked list for storing column indices of the active row k */ 1314 nlnk = am + 1; 1315 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1316 1317 /* initial FreeSpace size is fill*(ai[am]+1) */ 1318 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1319 current_space = free_space; 1320 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1321 current_space_lvl = free_space_lvl; 1322 1323 for (k=0; k<am; k++){ /* for each active row k */ 1324 /* initialize lnk by the column indices of row rip[k] of A */ 1325 nzk = 0; 1326 ncols = ai[rip[k]+1] - ai[rip[k]]; 1327 ncols_upper = 0; 1328 cols = cols_lvl + am; 1329 for (j=0; j<ncols; j++){ 1330 i = rip[*(aj + ai[rip[k]] + j)]; 1331 if (i >= k){ /* only take upper triangular entry */ 1332 cols[ncols_upper] = i; 1333 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1334 ncols_upper++; 1335 } 1336 } 1337 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1338 nzk += nlnk; 1339 1340 /* update lnk by computing fill-in for each pivot row to be merged in */ 1341 prow = jl[k]; /* 1st pivot row */ 1342 1343 while (prow < k){ 1344 nextprow = jl[prow]; 1345 1346 /* merge prow into k-th row */ 1347 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1348 jmax = ui[prow+1]; 1349 ncols = jmax-jmin; 1350 i = jmin - ui[prow]; 1351 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1352 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1353 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1354 nzk += nlnk; 1355 1356 /* update il and jl for prow */ 1357 if (jmin < jmax){ 1358 il[prow] = jmin; 1359 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1360 } 1361 prow = nextprow; 1362 } 1363 1364 /* if free space is not available, make more free space */ 1365 if (current_space->local_remaining<nzk) { 1366 i = am - k + 1; /* num of unfactored rows */ 1367 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1368 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1369 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1370 reallocs++; 1371 } 1372 1373 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1374 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1375 1376 /* add the k-th row into il and jl */ 1377 if (nzk > 1){ 1378 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1379 jl[k] = jl[i]; jl[i] = k; 1380 il[k] = ui[k] + 1; 1381 } 1382 uj_ptr[k] = current_space->array; 1383 uj_lvl_ptr[k] = current_space_lvl->array; 1384 1385 current_space->array += nzk; 1386 current_space->local_used += nzk; 1387 current_space->local_remaining -= nzk; 1388 1389 current_space_lvl->array += nzk; 1390 current_space_lvl->local_used += nzk; 1391 current_space_lvl->local_remaining -= nzk; 1392 1393 ui[k+1] = ui[k] + nzk; 1394 } 1395 1396 if (ai[am] != 0) { 1397 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1398 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1399 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1400 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1401 } else { 1402 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1403 } 1404 1405 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1406 ierr = PetscFree(jl);CHKERRQ(ierr); 1407 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1408 1409 /* destroy list of free space and other temporary array(s) */ 1410 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1411 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1412 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1413 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1414 1415 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1416 1417 /* put together the new matrix in MATSEQSBAIJ format */ 1418 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1419 B = *fact; 1420 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1421 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1422 1423 b = (Mat_SeqSBAIJ*)B->data; 1424 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1425 b->singlemalloc = PETSC_FALSE; 1426 /* the next line frees the default space generated by the Create() */ 1427 ierr = PetscFree(b->a);CHKERRQ(ierr); 1428 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1429 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1430 b->j = uj; 1431 b->i = ui; 1432 b->diag = 0; 1433 b->ilen = 0; 1434 b->imax = 0; 1435 b->row = perm; 1436 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1437 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1438 b->icol = perm; 1439 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1440 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1441 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1442 b->maxnz = b->nz = ui[am]; 1443 1444 B->factor = FACTOR_CHOLESKY; 1445 B->info.factor_mallocs = reallocs; 1446 B->info.fill_ratio_given = fill; 1447 if (ai[am] != 0) { 1448 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1449 } else { 1450 B->info.fill_ratio_needed = 0.0; 1451 } 1452 /* 1453 b->factor_zeropivot = info->zeropivot; 1454 b->factor_damping = info->damping; 1455 b->factor_shift = info->shift; 1456 */ 1457 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1458 if (perm_identity){ 1459 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1467 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1468 { 1469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1470 Mat_SeqSBAIJ *b; 1471 Mat B; 1472 PetscErrorCode ierr; 1473 PetscTruth perm_identity; 1474 PetscReal fill = info->fill; 1475 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1476 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1477 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1478 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1479 PetscBT lnkbt; 1480 IS iperm; 1481 1482 PetscFunctionBegin; 1483 /* check whether perm is the identity mapping */ 1484 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1485 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1486 1487 if (!perm_identity){ 1488 /* check if perm is symmetric! */ 1489 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1490 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1491 for (i=0; i<mbs; i++) { 1492 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1493 } 1494 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1495 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1496 } 1497 1498 /* initialization */ 1499 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1500 ui[0] = 0; 1501 1502 /* jl: linked list for storing indices of the pivot rows 1503 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1504 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1505 il = jl + mbs; 1506 cols = il + mbs; 1507 ui_ptr = (PetscInt**)(cols + mbs); 1508 for (i=0; i<mbs; i++){ 1509 jl[i] = mbs; il[i] = 0; 1510 } 1511 1512 /* create and initialize a linked list for storing column indices of the active row k */ 1513 nlnk = mbs + 1; 1514 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1515 1516 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1517 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1518 current_space = free_space; 1519 1520 for (k=0; k<mbs; k++){ /* for each active row k */ 1521 /* initialize lnk by the column indices of row rip[k] of A */ 1522 nzk = 0; 1523 ncols = ai[rip[k]+1] - ai[rip[k]]; 1524 ncols_upper = 0; 1525 for (j=0; j<ncols; j++){ 1526 i = rip[*(aj + ai[rip[k]] + j)]; 1527 if (i >= k){ /* only take upper triangular entry */ 1528 cols[ncols_upper] = i; 1529 ncols_upper++; 1530 } 1531 } 1532 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1533 nzk += nlnk; 1534 1535 /* update lnk by computing fill-in for each pivot row to be merged in */ 1536 prow = jl[k]; /* 1st pivot row */ 1537 1538 while (prow < k){ 1539 nextprow = jl[prow]; 1540 /* merge prow into k-th row */ 1541 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1542 jmax = ui[prow+1]; 1543 ncols = jmax-jmin; 1544 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1545 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1546 nzk += nlnk; 1547 1548 /* update il and jl for prow */ 1549 if (jmin < jmax){ 1550 il[prow] = jmin; 1551 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1552 } 1553 prow = nextprow; 1554 } 1555 1556 /* if free space is not available, make more free space */ 1557 if (current_space->local_remaining<nzk) { 1558 i = mbs - k + 1; /* num of unfactored rows */ 1559 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1560 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1561 reallocs++; 1562 } 1563 1564 /* copy data into free space, then initialize lnk */ 1565 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1566 1567 /* add the k-th row into il and jl */ 1568 if (nzk-1 > 0){ 1569 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1570 jl[k] = jl[i]; jl[i] = k; 1571 il[k] = ui[k] + 1; 1572 } 1573 ui_ptr[k] = current_space->array; 1574 current_space->array += nzk; 1575 current_space->local_used += nzk; 1576 current_space->local_remaining -= nzk; 1577 1578 ui[k+1] = ui[k] + nzk; 1579 } 1580 1581 if (ai[mbs] != 0) { 1582 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1583 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1584 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1585 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1586 } else { 1587 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1588 } 1589 1590 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1591 ierr = PetscFree(jl);CHKERRQ(ierr); 1592 1593 /* destroy list of free space and other temporary array(s) */ 1594 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1595 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1596 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1597 1598 /* put together the new matrix in MATSEQSBAIJ format */ 1599 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1600 B = *fact; 1601 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1602 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1603 1604 b = (Mat_SeqSBAIJ*)B->data; 1605 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1606 b->singlemalloc = PETSC_FALSE; 1607 /* the next line frees the default space generated by the Create() */ 1608 ierr = PetscFree(b->a);CHKERRQ(ierr); 1609 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1610 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1611 b->j = uj; 1612 b->i = ui; 1613 b->diag = 0; 1614 b->ilen = 0; 1615 b->imax = 0; 1616 b->row = perm; 1617 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1618 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1619 b->icol = perm; 1620 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1621 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1622 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1623 b->maxnz = b->nz = ui[mbs]; 1624 1625 B->factor = FACTOR_CHOLESKY; 1626 B->info.factor_mallocs = reallocs; 1627 B->info.fill_ratio_given = fill; 1628 if (ai[mbs] != 0) { 1629 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1630 } else { 1631 B->info.fill_ratio_needed = 0.0; 1632 } 1633 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1634 if (perm_identity){ 1635 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1636 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640