1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 431 { 432 Mat C=*B; 433 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal zeropivot=b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.; 442 PetscTruth lushift; 443 PetscReal shift_pd=b->lu_shift,shift_nonzero=b->lu_damping; 444 445 PetscFunctionBegin; 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 shift_fraction = b->lu_shift_fraction; 477 shift_amount = 0; 478 do { 479 lushift = PETSC_FALSE; 480 for (i=0; i<n; i++){ 481 nz = ai[i+1] - ai[i]; 482 ajtmp = aj + ai[i]; 483 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 484 485 /* load in initial (unfactored row) */ 486 nz = a->i[r[i]+1] - a->i[r[i]]; 487 ajtmpold = a->j + a->i[r[i]]; 488 v = a->a + a->i[r[i]]; 489 for (j=0; j<nz; j++) { 490 rtmp[ics[ajtmpold[j]]] = v[j]; 491 } 492 rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */ 493 494 row = *ajtmp++; 495 while (row < i) { 496 pc = rtmp + row; 497 if (*pc != 0.0) { 498 pv = b->a + diag_offset[row]; 499 pj = b->j + diag_offset[row] + 1; 500 multiplier = *pc / *pv++; 501 *pc = multiplier; 502 nz = ai[row+1] - diag_offset[row] - 1; 503 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 504 PetscLogFlops(2*nz); 505 } 506 row = *ajtmp++; 507 } 508 /* finished row so stick it into b->a */ 509 pv = b->a + ai[i] ; 510 pj = b->j + ai[i] ; 511 nz = ai[i+1] - ai[i]; 512 diag = diag_offset[i] - ai[i]; 513 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 514 rs = 0.0; 515 for (j=0; j<nz; j++) { 516 pv[j] = rtmps[pj[j]]; 517 if (j != diag) rs += PetscAbsScalar(pv[j]); 518 } 519 /* shift the diagonals when zero pivot is detected */ 520 #define MAX_NSHIFT 5 521 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){ 522 /* force |diag(*B)| > zeropivot*rs */ 523 if (!nshift){ 524 shift_amount = shift_nonzero; 525 } else { 526 shift_amount *= 2.0; 527 } 528 lushift = PETSC_TRUE; 529 nshift++; 530 break; 531 } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){ 532 /* force *B to be diagonally dominant */ 533 if (nshift>MAX_NSHIFT) { 534 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 535 } else if (nshift==MAX_NSHIFT) { 536 shift_fraction = shift_hi; 537 lushift = PETSC_FALSE; 538 } else { 539 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 540 lushift = PETSC_TRUE; 541 } 542 shift_amount = shift_fraction * shift_top; 543 nshift++; 544 break; 545 } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){ 546 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 547 } 548 } 549 if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) { 550 /* 551 * if no shift in this attempt & shifting & started shifting & can refine, 552 * then try lower shift 553 */ 554 shift_hi = shift_fraction; 555 shift_fraction = (shift_hi+shift_lo)/2.; 556 shift_amount = shift_fraction * shift_top; 557 lushift = PETSC_TRUE; 558 nshift++; 559 } 560 } while (lushift); 561 562 /* invert diagonal entries for simplier triangular solves */ 563 for (i=0; i<n; i++) { 564 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 565 } 566 567 ierr = PetscFree(rtmp);CHKERRQ(ierr); 568 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 569 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 570 C->factor = FACTOR_LU; 571 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 572 C->assembled = PETSC_TRUE; 573 PetscLogFlops(C->n); 574 if (nshift){ 575 if (shift_nonzero) { 576 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount%g\n",nshift,shift_amount); 577 } else if (shift_pd) { 578 b->lu_shift_fraction = shift_fraction; 579 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 587 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 588 { 589 PetscFunctionBegin; 590 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 591 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 592 PetscFunctionReturn(0); 593 } 594 595 596 /* ----------------------------------------------------------- */ 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatLUFactor_SeqAIJ" 599 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 600 { 601 PetscErrorCode ierr; 602 Mat C; 603 604 PetscFunctionBegin; 605 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 606 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 607 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 608 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 609 PetscFunctionReturn(0); 610 } 611 /* ----------------------------------------------------------- */ 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatSolve_SeqAIJ" 614 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 615 { 616 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 617 IS iscol = a->col,isrow = a->row; 618 PetscErrorCode ierr; 619 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 620 PetscInt nz,*rout,*cout; 621 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 622 623 PetscFunctionBegin; 624 if (!n) PetscFunctionReturn(0); 625 626 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 627 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 628 tmp = a->solve_work; 629 630 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 631 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 632 633 /* forward solve the lower triangular */ 634 tmp[0] = b[*r++]; 635 tmps = tmp; 636 for (i=1; i<n; i++) { 637 v = aa + ai[i] ; 638 vi = aj + ai[i] ; 639 nz = a->diag[i] - ai[i]; 640 sum = b[*r++]; 641 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 642 tmp[i] = sum; 643 } 644 645 /* backward solve the upper triangular */ 646 for (i=n-1; i>=0; i--){ 647 v = aa + a->diag[i] + 1; 648 vi = aj + a->diag[i] + 1; 649 nz = ai[i+1] - a->diag[i] - 1; 650 sum = tmp[i]; 651 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 652 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 653 } 654 655 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 657 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 658 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 659 PetscLogFlops(2*a->nz - A->n); 660 PetscFunctionReturn(0); 661 } 662 663 /* ----------------------------------------------------------- */ 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 666 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 667 { 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 669 PetscErrorCode ierr; 670 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 671 PetscScalar *x,*b,*aa = a->a; 672 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 673 PetscInt adiag_i,i,*vi,nz,ai_i; 674 PetscScalar *v,sum; 675 #endif 676 677 PetscFunctionBegin; 678 if (!n) PetscFunctionReturn(0); 679 680 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 682 683 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 684 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 685 #else 686 /* forward solve the lower triangular */ 687 x[0] = b[0]; 688 for (i=1; i<n; i++) { 689 ai_i = ai[i]; 690 v = aa + ai_i; 691 vi = aj + ai_i; 692 nz = adiag[i] - ai_i; 693 sum = b[i]; 694 while (nz--) sum -= *v++ * x[*vi++]; 695 x[i] = sum; 696 } 697 698 /* backward solve the upper triangular */ 699 for (i=n-1; i>=0; i--){ 700 adiag_i = adiag[i]; 701 v = aa + adiag_i + 1; 702 vi = aj + adiag_i + 1; 703 nz = ai[i+1] - adiag_i - 1; 704 sum = x[i]; 705 while (nz--) sum -= *v++ * x[*vi++]; 706 x[i] = sum*aa[adiag_i]; 707 } 708 #endif 709 PetscLogFlops(2*a->nz - A->n); 710 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 711 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 717 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 718 { 719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 720 IS iscol = a->col,isrow = a->row; 721 PetscErrorCode ierr; 722 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 723 PetscInt nz,*rout,*cout; 724 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 725 726 PetscFunctionBegin; 727 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 728 729 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 tmp = a->solve_work; 732 733 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 734 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 735 736 /* forward solve the lower triangular */ 737 tmp[0] = b[*r++]; 738 for (i=1; i<n; i++) { 739 v = aa + ai[i] ; 740 vi = aj + ai[i] ; 741 nz = a->diag[i] - ai[i]; 742 sum = b[*r++]; 743 while (nz--) sum -= *v++ * tmp[*vi++ ]; 744 tmp[i] = sum; 745 } 746 747 /* backward solve the upper triangular */ 748 for (i=n-1; i>=0; i--){ 749 v = aa + a->diag[i] + 1; 750 vi = aj + a->diag[i] + 1; 751 nz = ai[i+1] - a->diag[i] - 1; 752 sum = tmp[i]; 753 while (nz--) sum -= *v++ * tmp[*vi++ ]; 754 tmp[i] = sum*aa[a->diag[i]]; 755 x[*c--] += tmp[i]; 756 } 757 758 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 759 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 760 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 761 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 762 PetscLogFlops(2*a->nz); 763 764 PetscFunctionReturn(0); 765 } 766 /* -------------------------------------------------------------------*/ 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 769 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 770 { 771 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 772 IS iscol = a->col,isrow = a->row; 773 PetscErrorCode ierr; 774 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 775 PetscInt nz,*rout,*cout,*diag = a->diag; 776 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 777 778 PetscFunctionBegin; 779 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 780 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 781 tmp = a->solve_work; 782 783 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 784 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 785 786 /* copy the b into temp work space according to permutation */ 787 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 788 789 /* forward solve the U^T */ 790 for (i=0; i<n; i++) { 791 v = aa + diag[i] ; 792 vi = aj + diag[i] + 1; 793 nz = ai[i+1] - diag[i] - 1; 794 s1 = tmp[i]; 795 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 796 while (nz--) { 797 tmp[*vi++ ] -= (*v++)*s1; 798 } 799 tmp[i] = s1; 800 } 801 802 /* backward solve the L^T */ 803 for (i=n-1; i>=0; i--){ 804 v = aa + diag[i] - 1 ; 805 vi = aj + diag[i] - 1 ; 806 nz = diag[i] - ai[i]; 807 s1 = tmp[i]; 808 while (nz--) { 809 tmp[*vi-- ] -= (*v--)*s1; 810 } 811 } 812 813 /* copy tmp into x according to permutation */ 814 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 815 816 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 817 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 821 PetscLogFlops(2*a->nz-A->n); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 827 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 828 { 829 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 830 IS iscol = a->col,isrow = a->row; 831 PetscErrorCode ierr; 832 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 833 PetscInt nz,*rout,*cout,*diag = a->diag; 834 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 835 836 PetscFunctionBegin; 837 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 838 839 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 840 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 841 tmp = a->solve_work; 842 843 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 844 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 845 846 /* copy the b into temp work space according to permutation */ 847 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 848 849 /* forward solve the U^T */ 850 for (i=0; i<n; i++) { 851 v = aa + diag[i] ; 852 vi = aj + diag[i] + 1; 853 nz = ai[i+1] - diag[i] - 1; 854 tmp[i] *= *v++; 855 while (nz--) { 856 tmp[*vi++ ] -= (*v++)*tmp[i]; 857 } 858 } 859 860 /* backward solve the L^T */ 861 for (i=n-1; i>=0; i--){ 862 v = aa + diag[i] - 1 ; 863 vi = aj + diag[i] - 1 ; 864 nz = diag[i] - ai[i]; 865 while (nz--) { 866 tmp[*vi-- ] -= (*v--)*tmp[i]; 867 } 868 } 869 870 /* copy tmp into x according to permutation */ 871 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 872 873 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 874 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 875 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 876 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 877 878 PetscLogFlops(2*a->nz); 879 PetscFunctionReturn(0); 880 } 881 /* ----------------------------------------------------------------*/ 882 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 887 { 888 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 889 IS isicol; 890 PetscErrorCode ierr; 891 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 892 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 893 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 894 PetscInt incrlev,nnz,i,levels,diagonal_fill; 895 PetscTruth col_identity,row_identity; 896 PetscReal f; 897 898 PetscFunctionBegin; 899 f = info->fill; 900 levels = (PetscInt)info->levels; 901 diagonal_fill = (PetscInt)info->diagonal_fill; 902 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 903 904 /* special case that simply copies fill pattern */ 905 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 906 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 907 if (!levels && row_identity && col_identity) { 908 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 909 (*fact)->factor = FACTOR_LU; 910 b = (Mat_SeqAIJ*)(*fact)->data; 911 if (!b->diag) { 912 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 913 } 914 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 915 b->row = isrow; 916 b->col = iscol; 917 b->icol = isicol; 918 b->lu_damping = info->damping; 919 b->lu_zeropivot = info->zeropivot; 920 b->lu_shift = info->shift; 921 b->lu_shift_fraction= info->shift_fraction; 922 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 923 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 924 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 925 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 930 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 931 932 /* get new row pointers */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 934 ainew[0] = 0; 935 /* don't know how many column pointers are needed so estimate */ 936 jmax = (PetscInt)(f*(ai[n]+1)); 937 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 938 /* ajfill is level of fill for each fill entry */ 939 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 940 /* fill is a linked list of nonzeros in active row */ 941 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 942 /* im is level for each filled value */ 943 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 944 /* dloc is location of diagonal in factor */ 945 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 946 dloc[0] = 0; 947 for (prow=0; prow<n; prow++) { 948 949 /* copy current row into linked list */ 950 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 951 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 952 xi = aj + ai[r[prow]]; 953 fill[n] = n; 954 fill[prow] = -1; /* marker to indicate if diagonal exists */ 955 while (nz--) { 956 fm = n; 957 idx = ic[*xi++ ]; 958 do { 959 m = fm; 960 fm = fill[m]; 961 } while (fm < idx); 962 fill[m] = idx; 963 fill[idx] = fm; 964 im[idx] = 0; 965 } 966 967 /* make sure diagonal entry is included */ 968 if (diagonal_fill && fill[prow] == -1) { 969 fm = n; 970 while (fill[fm] < prow) fm = fill[fm]; 971 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 972 fill[fm] = prow; 973 im[prow] = 0; 974 nzf++; 975 dcount++; 976 } 977 978 nzi = 0; 979 row = fill[n]; 980 while (row < prow) { 981 incrlev = im[row] + 1; 982 nz = dloc[row]; 983 xi = ajnew + ainew[row] + nz + 1; 984 flev = ajfill + ainew[row] + nz + 1; 985 nnz = ainew[row+1] - ainew[row] - nz - 1; 986 fm = row; 987 while (nnz-- > 0) { 988 idx = *xi++ ; 989 if (*flev + incrlev > levels) { 990 flev++; 991 continue; 992 } 993 do { 994 m = fm; 995 fm = fill[m]; 996 } while (fm < idx); 997 if (fm != idx) { 998 im[idx] = *flev + incrlev; 999 fill[m] = idx; 1000 fill[idx] = fm; 1001 fm = idx; 1002 nzf++; 1003 } else { 1004 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1005 } 1006 flev++; 1007 } 1008 row = fill[row]; 1009 nzi++; 1010 } 1011 /* copy new filled row into permanent storage */ 1012 ainew[prow+1] = ainew[prow] + nzf; 1013 if (ainew[prow+1] > jmax) { 1014 1015 /* estimate how much additional space we will need */ 1016 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1017 /* just double the memory each time */ 1018 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1019 PetscInt maxadd = jmax; 1020 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1021 jmax += maxadd; 1022 1023 /* allocate a longer ajnew and ajfill */ 1024 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1025 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1026 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1027 ajnew = xi; 1028 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1029 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ajfill = xi; 1032 reallocs++; /* count how many times we realloc */ 1033 } 1034 xi = ajnew + ainew[prow] ; 1035 flev = ajfill + ainew[prow] ; 1036 dloc[prow] = nzi; 1037 fm = fill[n]; 1038 while (nzf--) { 1039 *xi++ = fm ; 1040 *flev++ = im[fm]; 1041 fm = fill[fm]; 1042 } 1043 /* make sure row has diagonal entry */ 1044 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1045 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1046 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1047 } 1048 } 1049 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1050 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1051 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1052 ierr = PetscFree(fill);CHKERRQ(ierr); 1053 ierr = PetscFree(im);CHKERRQ(ierr); 1054 1055 { 1056 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1057 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1058 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1059 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1060 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1061 if (diagonal_fill) { 1062 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1063 } 1064 } 1065 1066 /* put together the new matrix */ 1067 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1068 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1069 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1070 PetscLogObjectParent(*fact,isicol); 1071 b = (Mat_SeqAIJ*)(*fact)->data; 1072 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1073 b->singlemalloc = PETSC_FALSE; 1074 len = (ainew[n] )*sizeof(PetscScalar); 1075 /* the next line frees the default space generated by the Create() */ 1076 ierr = PetscFree(b->a);CHKERRQ(ierr); 1077 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1078 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1079 b->j = ajnew; 1080 b->i = ainew; 1081 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1082 b->diag = dloc; 1083 b->ilen = 0; 1084 b->imax = 0; 1085 b->row = isrow; 1086 b->col = iscol; 1087 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1088 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1089 b->icol = isicol; 1090 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1091 /* In b structure: Free imax, ilen, old a, old j. 1092 Allocate dloc, solve_work, new a, new j */ 1093 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1094 b->maxnz = b->nz = ainew[n] ; 1095 b->lu_damping = info->damping; 1096 b->lu_shift = info->shift; 1097 b->lu_shift_fraction = info->shift_fraction; 1098 b->lu_zeropivot = info->zeropivot; 1099 (*fact)->factor = FACTOR_LU; 1100 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1101 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1102 1103 (*fact)->info.factor_mallocs = reallocs; 1104 (*fact)->info.fill_ratio_given = f; 1105 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #include "src/mat/impls/sbaij/seq/sbaij.h" 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1112 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1113 { 1114 Mat C = *B; 1115 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1116 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1117 IS ip=b->row; 1118 PetscErrorCode ierr; 1119 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1120 PetscInt *ai=a->i,*aj=a->j; 1121 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1122 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1123 PetscReal zeropivot=b->factor_zeropivot,shift_amount,rs; 1124 PetscTruth chshift; 1125 PetscInt nshift=0; 1126 PetscReal shift_pd=b->factor_shift,shift_nonzero=b->factor_damping; 1127 1128 PetscFunctionBegin; 1129 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1130 1131 /* initialization */ 1132 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1133 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1134 jl = il + mbs; 1135 rtmp = (MatScalar*)(jl + mbs); 1136 1137 shift_amount = 0; 1138 do { 1139 chshift = PETSC_FALSE; 1140 for (i=0; i<mbs; i++) { 1141 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1142 } 1143 1144 for (k = 0; k<mbs; k++){ 1145 bval = ba + bi[k]; 1146 /* initialize k-th row by the perm[k]-th row of A */ 1147 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1148 for (j = jmin; j < jmax; j++){ 1149 col = rip[aj[j]]; 1150 if (col >= k){ /* only take upper triangular entry */ 1151 rtmp[col] = aa[j]; 1152 *bval++ = 0.0; /* for in-place factorization */ 1153 } 1154 } 1155 /* shift the diagonal of the matrix */ 1156 if (nshift) rtmp[k] += shift_amount; 1157 1158 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1159 dk = rtmp[k]; 1160 i = jl[k]; /* first row to be added to k_th row */ 1161 1162 while (i < k){ 1163 nexti = jl[i]; /* next row to be added to k_th row */ 1164 1165 /* compute multiplier, update diag(k) and U(i,k) */ 1166 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1167 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1168 dk += uikdi*ba[ili]; 1169 ba[ili] = uikdi; /* -U(i,k) */ 1170 1171 /* add multiple of row i to k-th row */ 1172 jmin = ili + 1; jmax = bi[i+1]; 1173 if (jmin < jmax){ 1174 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1175 /* update il and jl for row i */ 1176 il[i] = jmin; 1177 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1178 } 1179 i = nexti; 1180 } 1181 1182 /* shift the diagonals when zero pivot is detected */ 1183 /* compute rs=sum of abs(off-diagonal) */ 1184 rs = 0.0; 1185 jmin = bi[k]+1; 1186 nz = bi[k+1] - jmin; 1187 if (nz){ 1188 bcol = bj + jmin; 1189 while (nz--){ 1190 rs += PetscAbsScalar(rtmp[*bcol++]); 1191 } 1192 } 1193 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1194 /* force |diag(*B)| > zeropivot*rs */ 1195 if (!nshift){ 1196 shift_amount = shift_nonzero; 1197 } else { 1198 shift_amount *= 2.0; 1199 } 1200 chshift = PETSC_TRUE; 1201 nshift++; 1202 break; 1203 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1204 /* calculate a shift that would make this row diagonally dominant */ 1205 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1206 chshift = PETSC_TRUE; 1207 /* Unlike in the ILU case there is no exit condition on nshift: 1208 we increase the shift until it converges. There is no guarantee that 1209 this algorithm converges faster or slower, or is better or worse 1210 than the ILU algorithm. */ 1211 nshift++; 1212 break; 1213 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1214 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1215 } 1216 1217 /* copy data into U(k,:) */ 1218 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1219 jmin = bi[k]+1; jmax = bi[k+1]; 1220 if (jmin < jmax) { 1221 for (j=jmin; j<jmax; j++){ 1222 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1223 } 1224 /* add the k-th row into il and jl */ 1225 il[k] = jmin; 1226 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1227 } 1228 } 1229 } while (chshift); 1230 ierr = PetscFree(il);CHKERRQ(ierr); 1231 1232 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1233 C->factor = FACTOR_CHOLESKY; 1234 C->assembled = PETSC_TRUE; 1235 C->preallocated = PETSC_TRUE; 1236 PetscLogFlops(C->m); 1237 if (nshift){ 1238 if (shift_nonzero) { 1239 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1240 } else if (shift_pd) { 1241 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1242 } 1243 } 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #include "petscbt.h" 1248 #include "src/mat/utils/freespace.h" 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1251 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1252 { 1253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1254 Mat_SeqSBAIJ *b; 1255 Mat B; 1256 PetscErrorCode ierr; 1257 PetscTruth perm_identity; 1258 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1259 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1260 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1261 PetscReal fill=info->fill,levels=info->levels; 1262 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1263 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1264 PetscBT lnkbt; 1265 1266 PetscFunctionBegin; 1267 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1268 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1269 1270 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1271 ui[0] = 0; 1272 1273 /* special case that simply copies fill pattern */ 1274 if (!levels && perm_identity) { 1275 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1276 for (i=0; i<am; i++) { 1277 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1278 } 1279 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1280 cols = uj; 1281 for (i=0; i<am; i++) { 1282 aj = a->j + a->diag[i]; 1283 ncols = ui[i+1] - ui[i]; 1284 for (j=0; j<ncols; j++) *cols++ = *aj++; 1285 } 1286 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1287 /* initialization */ 1288 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1289 1290 /* jl: linked list for storing indices of the pivot rows 1291 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1292 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1293 il = jl + am; 1294 uj_ptr = (PetscInt**)(il + am); 1295 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1296 for (i=0; i<am; i++){ 1297 jl[i] = am; il[i] = 0; 1298 } 1299 1300 /* create and initialize a linked list for storing column indices of the active row k */ 1301 nlnk = am + 1; 1302 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1303 1304 /* initial FreeSpace size is fill*(ai[am]+1) */ 1305 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1306 current_space = free_space; 1307 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1308 current_space_lvl = free_space_lvl; 1309 1310 for (k=0; k<am; k++){ /* for each active row k */ 1311 /* initialize lnk by the column indices of row rip[k] of A */ 1312 nzk = 0; 1313 ncols = ai[rip[k]+1] - ai[rip[k]]; 1314 ncols_upper = 0; 1315 cols = cols_lvl + am; 1316 for (j=0; j<ncols; j++){ 1317 i = rip[*(aj + ai[rip[k]] + j)]; 1318 if (i >= k){ /* only take upper triangular entry */ 1319 cols[ncols_upper] = i; 1320 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1321 ncols_upper++; 1322 } 1323 } 1324 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1325 nzk += nlnk; 1326 1327 /* update lnk by computing fill-in for each pivot row to be merged in */ 1328 prow = jl[k]; /* 1st pivot row */ 1329 1330 while (prow < k){ 1331 nextprow = jl[prow]; 1332 1333 /* merge prow into k-th row */ 1334 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1335 jmax = ui[prow+1]; 1336 ncols = jmax-jmin; 1337 i = jmin - ui[prow]; 1338 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1339 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1340 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1341 nzk += nlnk; 1342 1343 /* update il and jl for prow */ 1344 if (jmin < jmax){ 1345 il[prow] = jmin; 1346 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1347 } 1348 prow = nextprow; 1349 } 1350 1351 /* if free space is not available, make more free space */ 1352 if (current_space->local_remaining<nzk) { 1353 i = am - k + 1; /* num of unfactored rows */ 1354 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1355 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1356 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1357 reallocs++; 1358 } 1359 1360 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1361 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1362 1363 /* add the k-th row into il and jl */ 1364 if (nzk > 1){ 1365 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1366 jl[k] = jl[i]; jl[i] = k; 1367 il[k] = ui[k] + 1; 1368 } 1369 uj_ptr[k] = current_space->array; 1370 uj_lvl_ptr[k] = current_space_lvl->array; 1371 1372 current_space->array += nzk; 1373 current_space->local_used += nzk; 1374 current_space->local_remaining -= nzk; 1375 1376 current_space_lvl->array += nzk; 1377 current_space_lvl->local_used += nzk; 1378 current_space_lvl->local_remaining -= nzk; 1379 1380 ui[k+1] = ui[k] + nzk; 1381 } 1382 1383 if (ai[am] != 0) { 1384 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1385 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1386 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1387 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1388 } else { 1389 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1390 } 1391 1392 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1393 ierr = PetscFree(jl);CHKERRQ(ierr); 1394 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1395 1396 /* destroy list of free space and other temporary array(s) */ 1397 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1398 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1399 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1400 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1401 1402 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1403 1404 /* put together the new matrix in MATSEQSBAIJ format */ 1405 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1406 B = *fact; 1407 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1408 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1409 1410 b = (Mat_SeqSBAIJ*)B->data; 1411 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1412 b->singlemalloc = PETSC_FALSE; 1413 /* the next line frees the default space generated by the Create() */ 1414 ierr = PetscFree(b->a);CHKERRQ(ierr); 1415 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1416 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1417 b->j = uj; 1418 b->i = ui; 1419 b->diag = 0; 1420 b->ilen = 0; 1421 b->imax = 0; 1422 b->row = perm; 1423 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1424 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1425 b->icol = perm; 1426 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1427 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1428 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1429 b->maxnz = b->nz = ui[am]; 1430 1431 B->factor = FACTOR_CHOLESKY; 1432 B->info.factor_mallocs = reallocs; 1433 B->info.fill_ratio_given = fill; 1434 if (ai[am] != 0) { 1435 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1436 } else { 1437 B->info.fill_ratio_needed = 0.0; 1438 } 1439 1440 b->factor_zeropivot = info->zeropivot; 1441 b->factor_damping = info->damping; 1442 b->factor_shift = info->shift; 1443 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1444 if (perm_identity){ 1445 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1453 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1454 { 1455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1456 Mat_SeqSBAIJ *b; 1457 Mat B; 1458 PetscErrorCode ierr; 1459 PetscTruth perm_identity; 1460 PetscReal fill = info->fill; 1461 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1462 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1463 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1464 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1465 PetscBT lnkbt; 1466 IS iperm; 1467 1468 PetscFunctionBegin; 1469 /* check whether perm is the identity mapping */ 1470 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1471 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1472 1473 if (!perm_identity){ 1474 /* check if perm is symmetric! */ 1475 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1476 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1477 for (i=0; i<mbs; i++) { 1478 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1479 } 1480 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1481 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1482 } 1483 1484 /* initialization */ 1485 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1486 ui[0] = 0; 1487 1488 /* jl: linked list for storing indices of the pivot rows 1489 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1490 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1491 il = jl + mbs; 1492 cols = il + mbs; 1493 ui_ptr = (PetscInt**)(cols + mbs); 1494 for (i=0; i<mbs; i++){ 1495 jl[i] = mbs; il[i] = 0; 1496 } 1497 1498 /* create and initialize a linked list for storing column indices of the active row k */ 1499 nlnk = mbs + 1; 1500 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1501 1502 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1503 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1504 current_space = free_space; 1505 1506 for (k=0; k<mbs; k++){ /* for each active row k */ 1507 /* initialize lnk by the column indices of row rip[k] of A */ 1508 nzk = 0; 1509 ncols = ai[rip[k]+1] - ai[rip[k]]; 1510 ncols_upper = 0; 1511 for (j=0; j<ncols; j++){ 1512 i = rip[*(aj + ai[rip[k]] + j)]; 1513 if (i >= k){ /* only take upper triangular entry */ 1514 cols[ncols_upper] = i; 1515 ncols_upper++; 1516 } 1517 } 1518 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1519 nzk += nlnk; 1520 1521 /* update lnk by computing fill-in for each pivot row to be merged in */ 1522 prow = jl[k]; /* 1st pivot row */ 1523 1524 while (prow < k){ 1525 nextprow = jl[prow]; 1526 /* merge prow into k-th row */ 1527 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1528 jmax = ui[prow+1]; 1529 ncols = jmax-jmin; 1530 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1531 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1532 nzk += nlnk; 1533 1534 /* update il and jl for prow */ 1535 if (jmin < jmax){ 1536 il[prow] = jmin; 1537 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1538 } 1539 prow = nextprow; 1540 } 1541 1542 /* if free space is not available, make more free space */ 1543 if (current_space->local_remaining<nzk) { 1544 i = mbs - k + 1; /* num of unfactored rows */ 1545 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1546 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1547 reallocs++; 1548 } 1549 1550 /* copy data into free space, then initialize lnk */ 1551 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1552 1553 /* add the k-th row into il and jl */ 1554 if (nzk-1 > 0){ 1555 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1556 jl[k] = jl[i]; jl[i] = k; 1557 il[k] = ui[k] + 1; 1558 } 1559 ui_ptr[k] = current_space->array; 1560 current_space->array += nzk; 1561 current_space->local_used += nzk; 1562 current_space->local_remaining -= nzk; 1563 1564 ui[k+1] = ui[k] + nzk; 1565 } 1566 1567 if (ai[mbs] != 0) { 1568 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1569 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1570 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1571 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1572 } else { 1573 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1574 } 1575 1576 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1577 ierr = PetscFree(jl);CHKERRQ(ierr); 1578 1579 /* destroy list of free space and other temporary array(s) */ 1580 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1581 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1582 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1583 1584 /* put together the new matrix in MATSEQSBAIJ format */ 1585 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1586 B = *fact; 1587 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1588 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1589 1590 b = (Mat_SeqSBAIJ*)B->data; 1591 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1592 b->singlemalloc = PETSC_FALSE; 1593 /* the next line frees the default space generated by the Create() */ 1594 ierr = PetscFree(b->a);CHKERRQ(ierr); 1595 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1596 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1597 b->j = uj; 1598 b->i = ui; 1599 b->diag = 0; 1600 b->ilen = 0; 1601 b->imax = 0; 1602 b->row = perm; 1603 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1604 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1605 b->icol = perm; 1606 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1607 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1608 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1609 b->maxnz = b->nz = ui[mbs]; 1610 1611 B->factor = FACTOR_CHOLESKY; 1612 B->info.factor_mallocs = reallocs; 1613 B->info.fill_ratio_given = fill; 1614 if (ai[mbs] != 0) { 1615 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1616 } else { 1617 B->info.fill_ratio_needed = 0.0; 1618 } 1619 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1620 if (perm_identity){ 1621 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1622 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626