1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C=*B; 433 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal zeropivot=b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.; 442 PetscTruth lushift; 443 PetscReal shift_pd=b->lu_shift,shift_nonzero=b->lu_damping; 444 445 PetscFunctionBegin; 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 shift_fraction = b->lu_shift_fraction; 477 shift_amount = 0; 478 do { 479 lushift = PETSC_FALSE; 480 for (i=0; i<n; i++){ 481 nz = ai[i+1] - ai[i]; 482 ajtmp = aj + ai[i]; 483 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 484 485 /* load in initial (unfactored row) */ 486 nz = a->i[r[i]+1] - a->i[r[i]]; 487 ajtmpold = a->j + a->i[r[i]]; 488 v = a->a + a->i[r[i]]; 489 for (j=0; j<nz; j++) { 490 rtmp[ics[ajtmpold[j]]] = v[j]; 491 } 492 rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */ 493 494 row = *ajtmp++; 495 while (row < i) { 496 pc = rtmp + row; 497 if (*pc != 0.0) { 498 pv = b->a + diag_offset[row]; 499 pj = b->j + diag_offset[row] + 1; 500 multiplier = *pc / *pv++; 501 *pc = multiplier; 502 nz = ai[row+1] - diag_offset[row] - 1; 503 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 504 PetscLogFlops(2*nz); 505 } 506 row = *ajtmp++; 507 } 508 /* finished row so stick it into b->a */ 509 pv = b->a + ai[i] ; 510 pj = b->j + ai[i] ; 511 nz = ai[i+1] - ai[i]; 512 diag = diag_offset[i] - ai[i]; 513 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 514 rs = 0.0; 515 for (j=0; j<nz; j++) { 516 pv[j] = rtmps[pj[j]]; 517 if (j != diag) rs += PetscAbsScalar(pv[j]); 518 } 519 /* shift the diagonals when zero pivot is detected */ 520 #define MAX_NSHIFT 5 521 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){ 522 /* force |diag(*B)| > zeropivot*rs */ 523 if (!nshift){ 524 shift_amount = shift_nonzero; 525 } else { 526 shift_amount *= 2.0; 527 } 528 lushift = PETSC_TRUE; 529 nshift++; 530 break; 531 } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){ 532 /* force *B to be diagonally dominant */ 533 if (nshift>MAX_NSHIFT) { 534 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 535 } else if (nshift==MAX_NSHIFT) { 536 shift_fraction = shift_hi; 537 lushift = PETSC_FALSE; 538 } else { 539 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 540 lushift = PETSC_TRUE; 541 } 542 shift_amount = shift_fraction * shift_top; 543 nshift++; 544 break; 545 } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){ 546 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 547 } 548 } 549 if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) { 550 /* 551 * if no shift in this attempt & shifting & started shifting & can refine, 552 * then try lower shift 553 */ 554 shift_hi = shift_fraction; 555 shift_fraction = (shift_hi+shift_lo)/2.; 556 shift_amount = shift_fraction * shift_top; 557 lushift = PETSC_TRUE; 558 nshift++; 559 } 560 } while (lushift); 561 562 /* invert diagonal entries for simplier triangular solves */ 563 for (i=0; i<n; i++) { 564 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 565 } 566 567 ierr = PetscFree(rtmp);CHKERRQ(ierr); 568 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 569 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 570 C->factor = FACTOR_LU; 571 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 572 C->assembled = PETSC_TRUE; 573 PetscLogFlops(C->n); 574 if (nshift){ 575 if (shift_nonzero) { 576 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_ amount%g\n",nshift,shift_amount); 577 } else if (shift_pd) { 578 b->lu_shift_fraction = shift_fraction; 579 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 587 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 588 { 589 PetscFunctionBegin; 590 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 591 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 592 PetscFunctionReturn(0); 593 } 594 595 596 /* ----------------------------------------------------------- */ 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatLUFactor_SeqAIJ" 599 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 600 { 601 PetscErrorCode ierr; 602 Mat C; 603 604 PetscFunctionBegin; 605 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 606 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 607 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 608 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 609 PetscFunctionReturn(0); 610 } 611 /* ----------------------------------------------------------- */ 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatSolve_SeqAIJ" 614 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 615 { 616 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 617 IS iscol = a->col,isrow = a->row; 618 PetscErrorCode ierr; 619 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 620 PetscInt nz,*rout,*cout; 621 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 622 623 PetscFunctionBegin; 624 if (!n) PetscFunctionReturn(0); 625 626 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 627 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 628 tmp = a->solve_work; 629 630 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 631 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 632 633 /* forward solve the lower triangular */ 634 tmp[0] = b[*r++]; 635 tmps = tmp; 636 for (i=1; i<n; i++) { 637 v = aa + ai[i] ; 638 vi = aj + ai[i] ; 639 nz = a->diag[i] - ai[i]; 640 sum = b[*r++]; 641 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 642 tmp[i] = sum; 643 } 644 645 /* backward solve the upper triangular */ 646 for (i=n-1; i>=0; i--){ 647 v = aa + a->diag[i] + 1; 648 vi = aj + a->diag[i] + 1; 649 nz = ai[i+1] - a->diag[i] - 1; 650 sum = tmp[i]; 651 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 652 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 653 } 654 655 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 657 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 658 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 659 PetscLogFlops(2*a->nz - A->n); 660 PetscFunctionReturn(0); 661 } 662 663 /* ----------------------------------------------------------- */ 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 666 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 667 { 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 669 PetscErrorCode ierr; 670 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 671 PetscScalar *x,*b,*aa = a->a; 672 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 673 PetscInt adiag_i,i,*vi,nz,ai_i; 674 PetscScalar *v,sum; 675 #endif 676 677 PetscFunctionBegin; 678 if (!n) PetscFunctionReturn(0); 679 680 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 682 683 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 684 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 685 #else 686 /* forward solve the lower triangular */ 687 x[0] = b[0]; 688 for (i=1; i<n; i++) { 689 ai_i = ai[i]; 690 v = aa + ai_i; 691 vi = aj + ai_i; 692 nz = adiag[i] - ai_i; 693 sum = b[i]; 694 while (nz--) sum -= *v++ * x[*vi++]; 695 x[i] = sum; 696 } 697 698 /* backward solve the upper triangular */ 699 for (i=n-1; i>=0; i--){ 700 adiag_i = adiag[i]; 701 v = aa + adiag_i + 1; 702 vi = aj + adiag_i + 1; 703 nz = ai[i+1] - adiag_i - 1; 704 sum = x[i]; 705 while (nz--) sum -= *v++ * x[*vi++]; 706 x[i] = sum*aa[adiag_i]; 707 } 708 #endif 709 PetscLogFlops(2*a->nz - A->n); 710 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 711 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 717 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 718 { 719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 720 IS iscol = a->col,isrow = a->row; 721 PetscErrorCode ierr; 722 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 723 PetscInt nz,*rout,*cout; 724 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 725 726 PetscFunctionBegin; 727 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 728 729 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 tmp = a->solve_work; 732 733 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 734 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 735 736 /* forward solve the lower triangular */ 737 tmp[0] = b[*r++]; 738 for (i=1; i<n; i++) { 739 v = aa + ai[i] ; 740 vi = aj + ai[i] ; 741 nz = a->diag[i] - ai[i]; 742 sum = b[*r++]; 743 while (nz--) sum -= *v++ * tmp[*vi++ ]; 744 tmp[i] = sum; 745 } 746 747 /* backward solve the upper triangular */ 748 for (i=n-1; i>=0; i--){ 749 v = aa + a->diag[i] + 1; 750 vi = aj + a->diag[i] + 1; 751 nz = ai[i+1] - a->diag[i] - 1; 752 sum = tmp[i]; 753 while (nz--) sum -= *v++ * tmp[*vi++ ]; 754 tmp[i] = sum*aa[a->diag[i]]; 755 x[*c--] += tmp[i]; 756 } 757 758 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 759 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 760 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 761 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 762 PetscLogFlops(2*a->nz); 763 764 PetscFunctionReturn(0); 765 } 766 /* -------------------------------------------------------------------*/ 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 769 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 770 { 771 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 772 IS iscol = a->col,isrow = a->row; 773 PetscErrorCode ierr; 774 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 775 PetscInt nz,*rout,*cout,*diag = a->diag; 776 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 777 778 PetscFunctionBegin; 779 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 780 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 781 tmp = a->solve_work; 782 783 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 784 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 785 786 /* copy the b into temp work space according to permutation */ 787 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 788 789 /* forward solve the U^T */ 790 for (i=0; i<n; i++) { 791 v = aa + diag[i] ; 792 vi = aj + diag[i] + 1; 793 nz = ai[i+1] - diag[i] - 1; 794 s1 = tmp[i]; 795 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 796 while (nz--) { 797 tmp[*vi++ ] -= (*v++)*s1; 798 } 799 tmp[i] = s1; 800 } 801 802 /* backward solve the L^T */ 803 for (i=n-1; i>=0; i--){ 804 v = aa + diag[i] - 1 ; 805 vi = aj + diag[i] - 1 ; 806 nz = diag[i] - ai[i]; 807 s1 = tmp[i]; 808 while (nz--) { 809 tmp[*vi-- ] -= (*v--)*s1; 810 } 811 } 812 813 /* copy tmp into x according to permutation */ 814 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 815 816 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 817 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 818 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 820 821 PetscLogFlops(2*a->nz-A->n); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 827 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 828 { 829 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 830 IS iscol = a->col,isrow = a->row; 831 PetscErrorCode ierr; 832 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 833 PetscInt nz,*rout,*cout,*diag = a->diag; 834 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 835 836 PetscFunctionBegin; 837 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 838 839 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 840 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 841 tmp = a->solve_work; 842 843 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 844 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 845 846 /* copy the b into temp work space according to permutation */ 847 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 848 849 /* forward solve the U^T */ 850 for (i=0; i<n; i++) { 851 v = aa + diag[i] ; 852 vi = aj + diag[i] + 1; 853 nz = ai[i+1] - diag[i] - 1; 854 tmp[i] *= *v++; 855 while (nz--) { 856 tmp[*vi++ ] -= (*v++)*tmp[i]; 857 } 858 } 859 860 /* backward solve the L^T */ 861 for (i=n-1; i>=0; i--){ 862 v = aa + diag[i] - 1 ; 863 vi = aj + diag[i] - 1 ; 864 nz = diag[i] - ai[i]; 865 while (nz--) { 866 tmp[*vi-- ] -= (*v--)*tmp[i]; 867 } 868 } 869 870 /* copy tmp into x according to permutation */ 871 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 872 873 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 874 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 875 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 876 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 877 878 PetscLogFlops(2*a->nz); 879 PetscFunctionReturn(0); 880 } 881 /* ----------------------------------------------------------------*/ 882 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 886 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 887 { 888 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 889 IS isicol; 890 PetscErrorCode ierr; 891 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 892 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 893 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 894 PetscInt incrlev,nnz,i,levels,diagonal_fill; 895 PetscTruth col_identity,row_identity; 896 PetscReal f; 897 898 PetscFunctionBegin; 899 f = info->fill; 900 levels = (PetscInt)info->levels; 901 diagonal_fill = (PetscInt)info->diagonal_fill; 902 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 903 904 /* special case that simply copies fill pattern */ 905 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 906 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 907 if (!levels && row_identity && col_identity) { 908 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 909 (*fact)->factor = FACTOR_LU; 910 b = (Mat_SeqAIJ*)(*fact)->data; 911 if (!b->diag) { 912 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 913 } 914 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 915 b->row = isrow; 916 b->col = iscol; 917 b->icol = isicol; 918 b->lu_damping = info->damping; 919 b->lu_zeropivot = info->zeropivot; 920 b->lu_shift = info->shift; 921 b->lu_shift_fraction= info->shift_fraction; 922 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 923 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 924 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 925 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 930 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 931 932 /* get new row pointers */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 934 ainew[0] = 0; 935 /* don't know how many column pointers are needed so estimate */ 936 jmax = (PetscInt)(f*(ai[n]+1)); 937 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 938 /* ajfill is level of fill for each fill entry */ 939 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 940 /* fill is a linked list of nonzeros in active row */ 941 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 942 /* im is level for each filled value */ 943 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 944 /* dloc is location of diagonal in factor */ 945 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 946 dloc[0] = 0; 947 for (prow=0; prow<n; prow++) { 948 949 /* copy current row into linked list */ 950 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 951 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 952 xi = aj + ai[r[prow]]; 953 fill[n] = n; 954 fill[prow] = -1; /* marker to indicate if diagonal exists */ 955 while (nz--) { 956 fm = n; 957 idx = ic[*xi++ ]; 958 do { 959 m = fm; 960 fm = fill[m]; 961 } while (fm < idx); 962 fill[m] = idx; 963 fill[idx] = fm; 964 im[idx] = 0; 965 } 966 967 /* make sure diagonal entry is included */ 968 if (diagonal_fill && fill[prow] == -1) { 969 fm = n; 970 while (fill[fm] < prow) fm = fill[fm]; 971 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 972 fill[fm] = prow; 973 im[prow] = 0; 974 nzf++; 975 dcount++; 976 } 977 978 nzi = 0; 979 row = fill[n]; 980 while (row < prow) { 981 incrlev = im[row] + 1; 982 nz = dloc[row]; 983 xi = ajnew + ainew[row] + nz + 1; 984 flev = ajfill + ainew[row] + nz + 1; 985 nnz = ainew[row+1] - ainew[row] - nz - 1; 986 fm = row; 987 while (nnz-- > 0) { 988 idx = *xi++ ; 989 if (*flev + incrlev > levels) { 990 flev++; 991 continue; 992 } 993 do { 994 m = fm; 995 fm = fill[m]; 996 } while (fm < idx); 997 if (fm != idx) { 998 im[idx] = *flev + incrlev; 999 fill[m] = idx; 1000 fill[idx] = fm; 1001 fm = idx; 1002 nzf++; 1003 } else { 1004 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1005 } 1006 flev++; 1007 } 1008 row = fill[row]; 1009 nzi++; 1010 } 1011 /* copy new filled row into permanent storage */ 1012 ainew[prow+1] = ainew[prow] + nzf; 1013 if (ainew[prow+1] > jmax) { 1014 1015 /* estimate how much additional space we will need */ 1016 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1017 /* just double the memory each time */ 1018 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1019 PetscInt maxadd = jmax; 1020 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1021 jmax += maxadd; 1022 1023 /* allocate a longer ajnew and ajfill */ 1024 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1025 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1026 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1027 ajnew = xi; 1028 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1029 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ajfill = xi; 1032 reallocs++; /* count how many times we realloc */ 1033 } 1034 xi = ajnew + ainew[prow] ; 1035 flev = ajfill + ainew[prow] ; 1036 dloc[prow] = nzi; 1037 fm = fill[n]; 1038 while (nzf--) { 1039 *xi++ = fm ; 1040 *flev++ = im[fm]; 1041 fm = fill[fm]; 1042 } 1043 /* make sure row has diagonal entry */ 1044 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1045 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1046 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1047 } 1048 } 1049 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1050 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1051 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1052 ierr = PetscFree(fill);CHKERRQ(ierr); 1053 ierr = PetscFree(im);CHKERRQ(ierr); 1054 1055 { 1056 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1057 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1058 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1059 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1060 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1061 if (diagonal_fill) { 1062 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1063 } 1064 } 1065 1066 /* put together the new matrix */ 1067 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1068 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1069 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1070 PetscLogObjectParent(*fact,isicol); 1071 b = (Mat_SeqAIJ*)(*fact)->data; 1072 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1073 b->singlemalloc = PETSC_FALSE; 1074 len = (ainew[n] )*sizeof(PetscScalar); 1075 /* the next line frees the default space generated by the Create() */ 1076 ierr = PetscFree(b->a);CHKERRQ(ierr); 1077 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1078 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1079 b->j = ajnew; 1080 b->i = ainew; 1081 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1082 b->diag = dloc; 1083 b->ilen = 0; 1084 b->imax = 0; 1085 b->row = isrow; 1086 b->col = iscol; 1087 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1088 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1089 b->icol = isicol; 1090 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1091 /* In b structure: Free imax, ilen, old a, old j. 1092 Allocate dloc, solve_work, new a, new j */ 1093 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1094 b->maxnz = b->nz = ainew[n] ; 1095 b->lu_damping = info->damping; 1096 b->lu_shift = info->shift; 1097 b->lu_shift_fraction = info->shift_fraction; 1098 b->lu_zeropivot = info->zeropivot; 1099 (*fact)->factor = FACTOR_LU; 1100 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1101 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1102 1103 (*fact)->info.factor_mallocs = reallocs; 1104 (*fact)->info.fill_ratio_given = f; 1105 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #include "src/mat/impls/sbaij/seq/sbaij.h" 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1112 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B) 1113 { 1114 Mat C = *B; 1115 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1116 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1117 IS ip=b->row; 1118 PetscErrorCode ierr; 1119 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1120 PetscInt *ai=a->i,*aj=a->j; 1121 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1122 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1123 PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 1124 PetscTruth damp,chshift; 1125 PetscInt nshift=0,ndamp=0; 1126 1127 PetscFunctionBegin; 1128 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1129 1130 /* initialization */ 1131 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1132 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1133 jl = il + mbs; 1134 rtmp = (MatScalar*)(jl + mbs); 1135 1136 shift_amount = 0; 1137 do { 1138 damp = PETSC_FALSE; 1139 chshift = PETSC_FALSE; 1140 1141 for (i=0; i<mbs; i++) { 1142 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1143 } 1144 1145 for (k = 0; k<mbs; k++){ 1146 bval = ba + bi[k]; 1147 /* initialize k-th row by the perm[k]-th row of A */ 1148 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1149 for (j = jmin; j < jmax; j++){ 1150 col = rip[aj[j]]; 1151 if (col >= k){ /* only take upper triangular entry */ 1152 rtmp[col] = aa[j]; 1153 *bval++ = 0.0; /* for in-place factorization */ 1154 } 1155 } 1156 /* damp the diagonal of the matrix */ 1157 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1158 1159 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1160 dk = rtmp[k]; 1161 i = jl[k]; /* first row to be added to k_th row */ 1162 1163 while (i < k){ 1164 nexti = jl[i]; /* next row to be added to k_th row */ 1165 1166 /* compute multiplier, update diag(k) and U(i,k) */ 1167 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1168 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1169 dk += uikdi*ba[ili]; 1170 ba[ili] = uikdi; /* -U(i,k) */ 1171 1172 /* add multiple of row i to k-th row */ 1173 jmin = ili + 1; jmax = bi[i+1]; 1174 if (jmin < jmax){ 1175 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1176 /* update il and jl for row i */ 1177 il[i] = jmin; 1178 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1179 } 1180 i = nexti; 1181 } 1182 1183 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1184 /* calculate a shift that would make this row diagonally dominant */ 1185 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1186 jmin = bi[k]+1; 1187 nz = bi[k+1] - jmin; 1188 if (nz){ 1189 bcol = bj + jmin; 1190 bval = ba + jmin; 1191 while (nz--){ 1192 rs += PetscAbsScalar(rtmp[*bcol++]); 1193 } 1194 } 1195 /* if this shift is less than the previous, just up the previous 1196 one by a bit */ 1197 shift_amount = PetscMax(rs,1.1*shift_amount); 1198 chshift = PETSC_TRUE; 1199 /* Unlike in the ILU case there is no exit condition on nshift: 1200 we increase the shift until it converges. There is no guarantee that 1201 this algorithm converges faster or slower, or is better or worse 1202 than the ILU algorithm. */ 1203 nshift++; 1204 break; 1205 } 1206 if (PetscRealPart(dk) < zeropivot){ 1207 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1208 if (damping > 0.0) { 1209 if (ndamp) damping *= 2.0; 1210 damp = PETSC_TRUE; 1211 ndamp++; 1212 break; 1213 } else if (PetscAbsScalar(dk) < zeropivot){ 1214 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1215 } else { 1216 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1217 } 1218 } 1219 1220 /* copy data into U(k,:) */ 1221 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1222 jmin = bi[k]+1; jmax = bi[k+1]; 1223 if (jmin < jmax) { 1224 for (j=jmin; j<jmax; j++){ 1225 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1226 } 1227 /* add the k-th row into il and jl */ 1228 il[k] = jmin; 1229 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1230 } 1231 } 1232 } while (damp||chshift); 1233 ierr = PetscFree(il);CHKERRQ(ierr); 1234 1235 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1236 C->factor = FACTOR_CHOLESKY; 1237 C->assembled = PETSC_TRUE; 1238 C->preallocated = PETSC_TRUE; 1239 PetscLogFlops(C->m); 1240 if (ndamp) { 1241 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 1242 } 1243 if (nshift) { 1244 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering" 1250 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact) 1251 { 1252 Mat C=*fact; 1253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1254 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1255 PetscErrorCode ierr; 1256 PetscInt i,j,am=A->m; 1257 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1258 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 1259 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1260 PetscReal zeropivot=b->factor_zeropivot,shift_amount,rs; 1261 PetscTruth chshift; 1262 PetscInt nshift=0; 1263 PetscReal shift_pd=b->factor_shift,shift_nonzero=b->factor_damping; 1264 1265 PetscFunctionBegin; 1266 /* initialization */ 1267 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 1268 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1269 jl = il + am; 1270 rtmp = (MatScalar*)(jl + am); 1271 1272 shift_amount = 0; 1273 do { 1274 chshift = PETSC_FALSE; 1275 for (i=0; i<am; i++) { 1276 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 1277 } 1278 1279 for (k = 0; k<am; k++){ 1280 /* initialize k-th row with elements nonzero in row perm(k) of A */ 1281 nz = ai[k+1] - ai[k]; 1282 acol = aj + ai[k]; 1283 aval = aa + ai[k]; 1284 bval = ba + bi[k]; 1285 while (nz -- ){ 1286 if (*acol < k) { /* skip lower triangular entries */ 1287 acol++; aval++; 1288 } else { 1289 rtmp[*acol++] = *aval++; 1290 *bval++ = 0.0; /* for in-place factorization */ 1291 } 1292 } 1293 /* shift the diagonal of the matrix */ 1294 if (nshift) rtmp[k] += shift_amount; 1295 1296 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1297 dk = rtmp[k]; 1298 i = jl[k]; /* first row to be added to k_th row */ 1299 1300 while (i < k){ 1301 nexti = jl[i]; /* next row to be added to k_th row */ 1302 /* compute multiplier, update D(k) and U(i,k) */ 1303 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1304 uikdi = - ba[ili]*ba[bi[i]]; 1305 dk += uikdi*ba[ili]; 1306 ba[ili] = uikdi; /* -U(i,k) */ 1307 1308 /* add multiple of row i to k-th row ... */ 1309 jmin = ili + 1; 1310 nz = bi[i+1] - jmin; 1311 if (nz > 0){ 1312 bcol = bj + jmin; 1313 bval = ba + jmin; 1314 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1315 /* update il and jl for i-th row */ 1316 il[i] = jmin; 1317 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1318 } 1319 i = nexti; 1320 } 1321 1322 /* shift the diagonals when zero pivot is detected */ 1323 /* compute rs=sum of abs(off-diagonal) */ 1324 rs = 0.0; 1325 jmin = bi[k]+1; 1326 nz = bi[k+1] - jmin; 1327 if (nz){ 1328 bcol = bj + jmin; 1329 while (nz--){ 1330 rs += PetscAbsScalar(rtmp[*bcol++]); 1331 } 1332 } 1333 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1334 /* force |diag(*B)| > zeropivot*rs */ 1335 if (!nshift){ 1336 shift_amount = shift_nonzero; 1337 } else { 1338 shift_amount *= 2.0; 1339 } 1340 chshift = PETSC_TRUE; 1341 nshift++; 1342 break; 1343 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1344 /* calculate a shift that would make this row diagonally dominant */ 1345 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1346 chshift = PETSC_TRUE; 1347 /* Unlike in the ILU case there is no exit condition on nshift: 1348 we increase the shift until it converges. There is no guarantee that 1349 this algorithm converges faster or slower, or is better or worse 1350 than the ILU algorithm. */ 1351 nshift++; 1352 break; 1353 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1354 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1355 } 1356 1357 /* copy data into U(k,:) */ 1358 ba[bi[k]] = 1.0/dk; 1359 jmin = bi[k]+1; 1360 nz = bi[k+1] - jmin; 1361 if (nz){ 1362 bcol = bj + jmin; 1363 bval = ba + jmin; 1364 while (nz--){ 1365 *bval++ = rtmp[*bcol]; 1366 rtmp[*bcol++] = 0.0; 1367 } 1368 /* add k-th row into il and jl */ 1369 il[k] = jmin; 1370 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1371 } 1372 } 1373 } while (chshift); 1374 ierr = PetscFree(il);CHKERRQ(ierr); 1375 1376 C->factor = FACTOR_CHOLESKY; 1377 C->assembled = PETSC_TRUE; 1378 C->preallocated = PETSC_TRUE; 1379 PetscLogFlops(C->m); 1380 if (nshift){ 1381 if (shift_nonzero) { 1382 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1383 } else if (shift_pd) { 1384 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1385 } 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #include "petscbt.h" 1391 #include "src/mat/utils/freespace.h" 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1394 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1395 { 1396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1397 Mat_SeqSBAIJ *b; 1398 Mat B; 1399 PetscErrorCode ierr; 1400 PetscTruth perm_identity; 1401 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1402 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1403 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1404 PetscReal fill=info->fill,levels=info->levels; 1405 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1406 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1407 PetscBT lnkbt; 1408 1409 PetscFunctionBegin; 1410 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1411 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1412 1413 /* special case that simply copies fill pattern */ 1414 if (!levels && perm_identity) { 1415 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1416 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1417 for (i=0; i<am; i++) { 1418 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1419 } 1420 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1421 B = *fact; 1422 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1423 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1424 1425 b = (Mat_SeqSBAIJ*)B->data; 1426 uj = b->j; 1427 for (i=0; i<am; i++) { 1428 aj = a->j + a->diag[i]; 1429 for (j=0; j<ui[i]; j++){ 1430 *uj++ = *aj++; 1431 } 1432 b->ilen[i] = ui[i]; 1433 } 1434 ierr = PetscFree(ui);CHKERRQ(ierr); 1435 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 1438 b->factor_zeropivot = info->zeropivot; 1439 b->factor_damping = info->damping; 1440 b->factor_shift = info->shift; 1441 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1442 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1443 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /* initialization */ 1449 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1450 ui[0] = 0; 1451 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1452 1453 /* jl: linked list for storing indices of the pivot rows 1454 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1455 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1456 il = jl + am; 1457 uj_ptr = (PetscInt**)(il + am); 1458 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1459 for (i=0; i<am; i++){ 1460 jl[i] = am; il[i] = 0; 1461 } 1462 1463 /* create and initialize a linked list for storing column indices of the active row k */ 1464 nlnk = am + 1; 1465 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1466 1467 /* initial FreeSpace size is fill*(ai[am]+1) */ 1468 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1469 current_space = free_space; 1470 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1471 current_space_lvl = free_space_lvl; 1472 1473 for (k=0; k<am; k++){ /* for each active row k */ 1474 /* initialize lnk by the column indices of row rip[k] of A */ 1475 nzk = 0; 1476 ncols = ai[rip[k]+1] - ai[rip[k]]; 1477 ncols_upper = 0; 1478 cols = cols_lvl + am; 1479 for (j=0; j<ncols; j++){ 1480 i = rip[*(aj + ai[rip[k]] + j)]; 1481 if (i >= k){ /* only take upper triangular entry */ 1482 cols[ncols_upper] = i; 1483 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1484 ncols_upper++; 1485 } 1486 } 1487 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1488 nzk += nlnk; 1489 1490 /* update lnk by computing fill-in for each pivot row to be merged in */ 1491 prow = jl[k]; /* 1st pivot row */ 1492 1493 while (prow < k){ 1494 nextprow = jl[prow]; 1495 1496 /* merge prow into k-th row */ 1497 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1498 jmax = ui[prow+1]; 1499 ncols = jmax-jmin; 1500 i = jmin - ui[prow]; 1501 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1502 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1503 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1504 nzk += nlnk; 1505 1506 /* update il and jl for prow */ 1507 if (jmin < jmax){ 1508 il[prow] = jmin; 1509 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1510 } 1511 prow = nextprow; 1512 } 1513 1514 /* if free space is not available, make more free space */ 1515 if (current_space->local_remaining<nzk) { 1516 i = am - k + 1; /* num of unfactored rows */ 1517 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1518 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1519 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1520 reallocs++; 1521 } 1522 1523 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1524 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1525 1526 /* add the k-th row into il and jl */ 1527 if (nzk-1 > 0){ 1528 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1529 jl[k] = jl[i]; jl[i] = k; 1530 il[k] = ui[k] + 1; 1531 } 1532 uj_ptr[k] = current_space->array; 1533 uj_lvl_ptr[k] = current_space_lvl->array; 1534 1535 current_space->array += nzk; 1536 current_space->local_used += nzk; 1537 current_space->local_remaining -= nzk; 1538 1539 current_space_lvl->array += nzk; 1540 current_space_lvl->local_used += nzk; 1541 current_space_lvl->local_remaining -= nzk; 1542 1543 ui[k+1] = ui[k] + nzk; 1544 } 1545 1546 if (ai[am] != 0) { 1547 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 1548 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1549 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1550 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1551 } else { 1552 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1553 } 1554 1555 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1556 ierr = PetscFree(jl);CHKERRQ(ierr); 1557 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1558 1559 /* destroy list of free space and other temporary array(s) */ 1560 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1561 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1562 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1563 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1564 1565 /* put together the new matrix in MATSEQSBAIJ format */ 1566 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1567 B = *fact; 1568 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1569 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1570 1571 b = (Mat_SeqSBAIJ*)B->data; 1572 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1573 b->singlemalloc = PETSC_FALSE; 1574 /* the next line frees the default space generated by the Create() */ 1575 ierr = PetscFree(b->a);CHKERRQ(ierr); 1576 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1577 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1578 b->j = uj; 1579 b->i = ui; 1580 b->diag = 0; 1581 b->ilen = 0; 1582 b->imax = 0; 1583 b->row = perm; 1584 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1585 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1586 b->icol = perm; 1587 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1588 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1589 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1590 b->maxnz = b->nz = ui[am]; 1591 1592 B->factor = FACTOR_CHOLESKY; 1593 B->info.factor_mallocs = reallocs; 1594 B->info.fill_ratio_given = fill; 1595 if (ai[am] != 0) { 1596 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1597 } else { 1598 B->info.fill_ratio_needed = 0.0; 1599 } 1600 1601 b->factor_zeropivot = info->zeropivot; 1602 b->factor_damping = info->damping; 1603 b->factor_shift = info->shift; 1604 if (perm_identity){ 1605 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1606 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1608 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1609 } else { 1610 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1617 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1618 { 1619 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1620 Mat_SeqSBAIJ *b; 1621 Mat B; 1622 PetscErrorCode ierr; 1623 PetscTruth perm_identity; 1624 PetscReal fill = info->fill; 1625 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1626 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1627 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1628 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1629 PetscBT lnkbt; 1630 IS iperm; 1631 1632 PetscFunctionBegin; 1633 /* check whether perm is the identity mapping */ 1634 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1635 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1636 1637 if (!perm_identity){ 1638 /* check if perm is symmetric! */ 1639 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1640 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1641 for (i=0; i<mbs; i++) { 1642 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1643 } 1644 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1645 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1646 } 1647 1648 /* initialization */ 1649 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1650 ui[0] = 0; 1651 1652 /* jl: linked list for storing indices of the pivot rows 1653 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1654 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1655 il = jl + mbs; 1656 cols = il + mbs; 1657 ui_ptr = (PetscInt**)(cols + mbs); 1658 for (i=0; i<mbs; i++){ 1659 jl[i] = mbs; il[i] = 0; 1660 } 1661 1662 /* create and initialize a linked list for storing column indices of the active row k */ 1663 nlnk = mbs + 1; 1664 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1665 1666 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1667 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1668 current_space = free_space; 1669 1670 for (k=0; k<mbs; k++){ /* for each active row k */ 1671 /* initialize lnk by the column indices of row rip[k] of A */ 1672 nzk = 0; 1673 ncols = ai[rip[k]+1] - ai[rip[k]]; 1674 ncols_upper = 0; 1675 for (j=0; j<ncols; j++){ 1676 i = rip[*(aj + ai[rip[k]] + j)]; 1677 if (i >= k){ /* only take upper triangular entry */ 1678 cols[ncols_upper] = i; 1679 ncols_upper++; 1680 } 1681 } 1682 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1683 nzk += nlnk; 1684 1685 /* update lnk by computing fill-in for each pivot row to be merged in */ 1686 prow = jl[k]; /* 1st pivot row */ 1687 1688 while (prow < k){ 1689 nextprow = jl[prow]; 1690 /* merge prow into k-th row */ 1691 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1692 jmax = ui[prow+1]; 1693 ncols = jmax-jmin; 1694 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1695 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1696 nzk += nlnk; 1697 1698 /* update il and jl for prow */ 1699 if (jmin < jmax){ 1700 il[prow] = jmin; 1701 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1702 } 1703 prow = nextprow; 1704 } 1705 1706 /* if free space is not available, make more free space */ 1707 if (current_space->local_remaining<nzk) { 1708 i = mbs - k + 1; /* num of unfactored rows */ 1709 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1710 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1711 reallocs++; 1712 } 1713 1714 /* copy data into free space, then initialize lnk */ 1715 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1716 1717 /* add the k-th row into il and jl */ 1718 if (nzk-1 > 0){ 1719 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1720 jl[k] = jl[i]; jl[i] = k; 1721 il[k] = ui[k] + 1; 1722 } 1723 ui_ptr[k] = current_space->array; 1724 current_space->array += nzk; 1725 current_space->local_used += nzk; 1726 current_space->local_remaining -= nzk; 1727 1728 ui[k+1] = ui[k] + nzk; 1729 } 1730 1731 if (ai[mbs] != 0) { 1732 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1733 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1734 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1735 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1736 } else { 1737 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1738 } 1739 1740 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1741 ierr = PetscFree(jl);CHKERRQ(ierr); 1742 1743 /* destroy list of free space and other temporary array(s) */ 1744 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1745 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1746 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1747 1748 /* put together the new matrix in MATSEQSBAIJ format */ 1749 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1750 B = *fact; 1751 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1752 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1753 1754 b = (Mat_SeqSBAIJ*)B->data; 1755 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1756 b->singlemalloc = PETSC_FALSE; 1757 /* the next line frees the default space generated by the Create() */ 1758 ierr = PetscFree(b->a);CHKERRQ(ierr); 1759 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1760 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1761 b->j = uj; 1762 b->i = ui; 1763 b->diag = 0; 1764 b->ilen = 0; 1765 b->imax = 0; 1766 b->row = perm; 1767 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1768 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1769 b->icol = perm; 1770 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1771 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1772 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1773 b->maxnz = b->nz = ui[mbs]; 1774 1775 B->factor = FACTOR_CHOLESKY; 1776 B->info.factor_mallocs = reallocs; 1777 B->info.fill_ratio_given = fill; 1778 if (ai[mbs] != 0) { 1779 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1780 } else { 1781 B->info.fill_ratio_needed = 0.0; 1782 } 1783 if (perm_identity){ 1784 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1785 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1786 ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1787 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1788 } else { 1789 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793