1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C=*B; 433 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal zeropivot=b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0.,shift_hi=1.,shift_top=0.; 442 PetscTruth lushift; 443 PetscReal shift_pd=b->lu_shift,shift_nonzero=b->lu_damping; 444 445 PetscFunctionBegin; 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 shift_fraction = 0; shift_amount = 0; 477 do { 478 lushift = PETSC_FALSE; 479 for (i=0; i<n; i++){ 480 nz = ai[i+1] - ai[i]; 481 ajtmp = aj + ai[i]; 482 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 483 484 /* load in initial (unfactored row) */ 485 nz = a->i[r[i]+1] - a->i[r[i]]; 486 ajtmpold = a->j + a->i[r[i]]; 487 v = a->a + a->i[r[i]]; 488 for (j=0; j<nz; j++) { 489 rtmp[ics[ajtmpold[j]]] = v[j]; 490 } 491 rtmp[ics[r[i]]] += shift_amount; /* shift the diagonal of the matrix */ 492 493 row = *ajtmp++; 494 while (row < i) { 495 pc = rtmp + row; 496 if (*pc != 0.0) { 497 pv = b->a + diag_offset[row]; 498 pj = b->j + diag_offset[row] + 1; 499 multiplier = *pc / *pv++; 500 *pc = multiplier; 501 nz = ai[row+1] - diag_offset[row] - 1; 502 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 503 PetscLogFlops(2*nz); 504 } 505 row = *ajtmp++; 506 } 507 /* finished row so stick it into b->a */ 508 pv = b->a + ai[i] ; 509 pj = b->j + ai[i] ; 510 nz = ai[i+1] - ai[i]; 511 diag = diag_offset[i] - ai[i]; 512 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 513 rs = 0.0; 514 for (j=0; j<nz; j++) { 515 pv[j] = rtmps[pj[j]]; 516 if (j != diag) rs += PetscAbsScalar(pv[j]); 517 } 518 /* shift the diagonals when zero pivot is detected */ 519 #define MAX_NSHIFT 5 520 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs && shift_nonzero){ 521 /* force |diag(*B)| > zeropivot*rs */ 522 if (!nshift){ 523 shift_amount = shift_nonzero; 524 } else { 525 shift_amount *= 2.0; 526 } 527 lushift = PETSC_TRUE; 528 nshift++; 529 break; 530 } else if (PetscRealPart(pv[diag]) <= zeropivot*rs && shift_pd){ 531 /* force *B to be diagonally dominant */ 532 if (nshift>MAX_NSHIFT) { 533 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 534 } else if (nshift==MAX_NSHIFT) { 535 shift_fraction = shift_hi; 536 lushift = PETSC_FALSE; 537 } else { 538 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 539 lushift = PETSC_TRUE; 540 } 541 shift_amount = shift_fraction * shift_top; 542 nshift++; 543 break; 544 } else if (PetscAbsScalar(pv[diag]) <= zeropivot*rs){ 545 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 546 } 547 } 548 if (shift_pd && !lushift && shift_fraction>0 && nshift<MAX_NSHIFT) { 549 /* 550 * if no shift in this attempt & shifting & started shifting & can refine, 551 * then try lower shift 552 */ 553 shift_hi = shift_fraction; 554 shift_fraction = (shift_hi+shift_lo)/2.; 555 shift_amount = shift_fraction * shift_top; 556 lushift = PETSC_TRUE; 557 nshift++; 558 } 559 } while (lushift); 560 561 /* invert diagonal entries for simplier triangular solves */ 562 for (i=0; i<n; i++) { 563 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 564 } 565 566 ierr = PetscFree(rtmp);CHKERRQ(ierr); 567 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 568 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 569 C->factor = FACTOR_LU; 570 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 571 C->assembled = PETSC_TRUE; 572 PetscLogFlops(C->n); 573 if (nshift && shift_nonzero) { 574 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D shift_nonzero value %g\n",nshift,shift_amount); 575 } 576 if (nshift && shift_pd) { 577 b->lu_shift_fraction = shift_fraction; 578 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 585 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 586 { 587 PetscFunctionBegin; 588 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 589 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 590 PetscFunctionReturn(0); 591 } 592 593 594 /* ----------------------------------------------------------- */ 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatLUFactor_SeqAIJ" 597 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 598 { 599 PetscErrorCode ierr; 600 Mat C; 601 602 PetscFunctionBegin; 603 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 604 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 605 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 606 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 607 PetscFunctionReturn(0); 608 } 609 /* ----------------------------------------------------------- */ 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatSolve_SeqAIJ" 612 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 613 { 614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 615 IS iscol = a->col,isrow = a->row; 616 PetscErrorCode ierr; 617 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 618 PetscInt nz,*rout,*cout; 619 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 620 621 PetscFunctionBegin; 622 if (!n) PetscFunctionReturn(0); 623 624 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 625 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 626 tmp = a->solve_work; 627 628 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 629 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 630 631 /* forward solve the lower triangular */ 632 tmp[0] = b[*r++]; 633 tmps = tmp; 634 for (i=1; i<n; i++) { 635 v = aa + ai[i] ; 636 vi = aj + ai[i] ; 637 nz = a->diag[i] - ai[i]; 638 sum = b[*r++]; 639 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 640 tmp[i] = sum; 641 } 642 643 /* backward solve the upper triangular */ 644 for (i=n-1; i>=0; i--){ 645 v = aa + a->diag[i] + 1; 646 vi = aj + a->diag[i] + 1; 647 nz = ai[i+1] - a->diag[i] - 1; 648 sum = tmp[i]; 649 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 650 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 651 } 652 653 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 654 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 655 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 656 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 657 PetscLogFlops(2*a->nz - A->n); 658 PetscFunctionReturn(0); 659 } 660 661 /* ----------------------------------------------------------- */ 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 664 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 665 { 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 PetscErrorCode ierr; 668 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 669 PetscScalar *x,*b,*aa = a->a; 670 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 671 PetscInt adiag_i,i,*vi,nz,ai_i; 672 PetscScalar *v,sum; 673 #endif 674 675 PetscFunctionBegin; 676 if (!n) PetscFunctionReturn(0); 677 678 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 679 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 680 681 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 682 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 683 #else 684 /* forward solve the lower triangular */ 685 x[0] = b[0]; 686 for (i=1; i<n; i++) { 687 ai_i = ai[i]; 688 v = aa + ai_i; 689 vi = aj + ai_i; 690 nz = adiag[i] - ai_i; 691 sum = b[i]; 692 while (nz--) sum -= *v++ * x[*vi++]; 693 x[i] = sum; 694 } 695 696 /* backward solve the upper triangular */ 697 for (i=n-1; i>=0; i--){ 698 adiag_i = adiag[i]; 699 v = aa + adiag_i + 1; 700 vi = aj + adiag_i + 1; 701 nz = ai[i+1] - adiag_i - 1; 702 sum = x[i]; 703 while (nz--) sum -= *v++ * x[*vi++]; 704 x[i] = sum*aa[adiag_i]; 705 } 706 #endif 707 PetscLogFlops(2*a->nz - A->n); 708 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 715 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 716 { 717 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 718 IS iscol = a->col,isrow = a->row; 719 PetscErrorCode ierr; 720 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 721 PetscInt nz,*rout,*cout; 722 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 723 724 PetscFunctionBegin; 725 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 726 727 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 728 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 729 tmp = a->solve_work; 730 731 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 732 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 733 734 /* forward solve the lower triangular */ 735 tmp[0] = b[*r++]; 736 for (i=1; i<n; i++) { 737 v = aa + ai[i] ; 738 vi = aj + ai[i] ; 739 nz = a->diag[i] - ai[i]; 740 sum = b[*r++]; 741 while (nz--) sum -= *v++ * tmp[*vi++ ]; 742 tmp[i] = sum; 743 } 744 745 /* backward solve the upper triangular */ 746 for (i=n-1; i>=0; i--){ 747 v = aa + a->diag[i] + 1; 748 vi = aj + a->diag[i] + 1; 749 nz = ai[i+1] - a->diag[i] - 1; 750 sum = tmp[i]; 751 while (nz--) sum -= *v++ * tmp[*vi++ ]; 752 tmp[i] = sum*aa[a->diag[i]]; 753 x[*c--] += tmp[i]; 754 } 755 756 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 757 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 758 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 759 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 760 PetscLogFlops(2*a->nz); 761 762 PetscFunctionReturn(0); 763 } 764 /* -------------------------------------------------------------------*/ 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 767 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 768 { 769 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 770 IS iscol = a->col,isrow = a->row; 771 PetscErrorCode ierr; 772 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 773 PetscInt nz,*rout,*cout,*diag = a->diag; 774 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 775 776 PetscFunctionBegin; 777 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 778 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 779 tmp = a->solve_work; 780 781 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 782 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 783 784 /* copy the b into temp work space according to permutation */ 785 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 786 787 /* forward solve the U^T */ 788 for (i=0; i<n; i++) { 789 v = aa + diag[i] ; 790 vi = aj + diag[i] + 1; 791 nz = ai[i+1] - diag[i] - 1; 792 s1 = tmp[i]; 793 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 794 while (nz--) { 795 tmp[*vi++ ] -= (*v++)*s1; 796 } 797 tmp[i] = s1; 798 } 799 800 /* backward solve the L^T */ 801 for (i=n-1; i>=0; i--){ 802 v = aa + diag[i] - 1 ; 803 vi = aj + diag[i] - 1 ; 804 nz = diag[i] - ai[i]; 805 s1 = tmp[i]; 806 while (nz--) { 807 tmp[*vi-- ] -= (*v--)*s1; 808 } 809 } 810 811 /* copy tmp into x according to permutation */ 812 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 813 814 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 815 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 816 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 817 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 818 819 PetscLogFlops(2*a->nz-A->n); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 825 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 826 { 827 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 828 IS iscol = a->col,isrow = a->row; 829 PetscErrorCode ierr; 830 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 831 PetscInt nz,*rout,*cout,*diag = a->diag; 832 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 833 834 PetscFunctionBegin; 835 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 836 837 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 838 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 839 tmp = a->solve_work; 840 841 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 842 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 843 844 /* copy the b into temp work space according to permutation */ 845 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 846 847 /* forward solve the U^T */ 848 for (i=0; i<n; i++) { 849 v = aa + diag[i] ; 850 vi = aj + diag[i] + 1; 851 nz = ai[i+1] - diag[i] - 1; 852 tmp[i] *= *v++; 853 while (nz--) { 854 tmp[*vi++ ] -= (*v++)*tmp[i]; 855 } 856 } 857 858 /* backward solve the L^T */ 859 for (i=n-1; i>=0; i--){ 860 v = aa + diag[i] - 1 ; 861 vi = aj + diag[i] - 1 ; 862 nz = diag[i] - ai[i]; 863 while (nz--) { 864 tmp[*vi-- ] -= (*v--)*tmp[i]; 865 } 866 } 867 868 /* copy tmp into x according to permutation */ 869 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 870 871 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 872 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 873 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 874 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 875 876 PetscLogFlops(2*a->nz); 877 PetscFunctionReturn(0); 878 } 879 /* ----------------------------------------------------------------*/ 880 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 884 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 885 { 886 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 887 IS isicol; 888 PetscErrorCode ierr; 889 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 890 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 891 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 892 PetscInt incrlev,nnz,i,levels,diagonal_fill; 893 PetscTruth col_identity,row_identity; 894 PetscReal f; 895 896 PetscFunctionBegin; 897 f = info->fill; 898 levels = (PetscInt)info->levels; 899 diagonal_fill = (PetscInt)info->diagonal_fill; 900 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 901 902 /* special case that simply copies fill pattern */ 903 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 904 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 905 if (!levels && row_identity && col_identity) { 906 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 907 (*fact)->factor = FACTOR_LU; 908 b = (Mat_SeqAIJ*)(*fact)->data; 909 if (!b->diag) { 910 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 911 } 912 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 913 b->row = isrow; 914 b->col = iscol; 915 b->icol = isicol; 916 b->lu_damping = info->damping; 917 b->lu_zeropivot = info->zeropivot; 918 b->lu_shift = info->shift; 919 b->lu_shift_fraction= info->shift_fraction; 920 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 921 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 922 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 923 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 928 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 929 930 /* get new row pointers */ 931 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 932 ainew[0] = 0; 933 /* don't know how many column pointers are needed so estimate */ 934 jmax = (PetscInt)(f*(ai[n]+1)); 935 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 936 /* ajfill is level of fill for each fill entry */ 937 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 938 /* fill is a linked list of nonzeros in active row */ 939 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 940 /* im is level for each filled value */ 941 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 942 /* dloc is location of diagonal in factor */ 943 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 944 dloc[0] = 0; 945 for (prow=0; prow<n; prow++) { 946 947 /* copy current row into linked list */ 948 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 949 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 950 xi = aj + ai[r[prow]] ; 951 fill[n] = n; 952 fill[prow] = -1; /* marker to indicate if diagonal exists */ 953 while (nz--) { 954 fm = n; 955 idx = ic[*xi++ ]; 956 do { 957 m = fm; 958 fm = fill[m]; 959 } while (fm < idx); 960 fill[m] = idx; 961 fill[idx] = fm; 962 im[idx] = 0; 963 } 964 965 /* make sure diagonal entry is included */ 966 if (diagonal_fill && fill[prow] == -1) { 967 fm = n; 968 while (fill[fm] < prow) fm = fill[fm]; 969 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 970 fill[fm] = prow; 971 im[prow] = 0; 972 nzf++; 973 dcount++; 974 } 975 976 nzi = 0; 977 row = fill[n]; 978 while (row < prow) { 979 incrlev = im[row] + 1; 980 nz = dloc[row]; 981 xi = ajnew + ainew[row] + nz + 1; 982 flev = ajfill + ainew[row] + nz + 1; 983 nnz = ainew[row+1] - ainew[row] - nz - 1; 984 fm = row; 985 while (nnz-- > 0) { 986 idx = *xi++ ; 987 if (*flev + incrlev > levels) { 988 flev++; 989 continue; 990 } 991 do { 992 m = fm; 993 fm = fill[m]; 994 } while (fm < idx); 995 if (fm != idx) { 996 im[idx] = *flev + incrlev; 997 fill[m] = idx; 998 fill[idx] = fm; 999 fm = idx; 1000 nzf++; 1001 } else { 1002 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1003 } 1004 flev++; 1005 } 1006 row = fill[row]; 1007 nzi++; 1008 } 1009 /* copy new filled row into permanent storage */ 1010 ainew[prow+1] = ainew[prow] + nzf; 1011 if (ainew[prow+1] > jmax) { 1012 1013 /* estimate how much additional space we will need */ 1014 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1015 /* just double the memory each time */ 1016 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1017 PetscInt maxadd = jmax; 1018 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1019 jmax += maxadd; 1020 1021 /* allocate a longer ajnew and ajfill */ 1022 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1023 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1024 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1025 ajnew = xi; 1026 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1027 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1028 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1029 ajfill = xi; 1030 reallocs++; /* count how many times we realloc */ 1031 } 1032 xi = ajnew + ainew[prow] ; 1033 flev = ajfill + ainew[prow] ; 1034 dloc[prow] = nzi; 1035 fm = fill[n]; 1036 while (nzf--) { 1037 *xi++ = fm ; 1038 *flev++ = im[fm]; 1039 fm = fill[fm]; 1040 } 1041 /* make sure row has diagonal entry */ 1042 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1043 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1044 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1045 } 1046 } 1047 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1048 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1049 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1050 ierr = PetscFree(fill);CHKERRQ(ierr); 1051 ierr = PetscFree(im);CHKERRQ(ierr); 1052 1053 { 1054 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1055 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1056 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1057 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1058 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1059 if (diagonal_fill) { 1060 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1061 } 1062 } 1063 1064 /* put together the new matrix */ 1065 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1066 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1067 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1068 PetscLogObjectParent(*fact,isicol); 1069 b = (Mat_SeqAIJ*)(*fact)->data; 1070 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1071 b->singlemalloc = PETSC_FALSE; 1072 len = (ainew[n] )*sizeof(PetscScalar); 1073 /* the next line frees the default space generated by the Create() */ 1074 ierr = PetscFree(b->a);CHKERRQ(ierr); 1075 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1076 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1077 b->j = ajnew; 1078 b->i = ainew; 1079 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1080 b->diag = dloc; 1081 b->ilen = 0; 1082 b->imax = 0; 1083 b->row = isrow; 1084 b->col = iscol; 1085 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1086 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1087 b->icol = isicol; 1088 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1089 /* In b structure: Free imax, ilen, old a, old j. 1090 Allocate dloc, solve_work, new a, new j */ 1091 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1092 b->maxnz = b->nz = ainew[n] ; 1093 b->lu_damping = info->damping; 1094 b->lu_shift = info->shift; 1095 b->lu_shift_fraction = info->shift_fraction; 1096 b->lu_zeropivot = info->zeropivot; 1097 (*fact)->factor = FACTOR_LU; 1098 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1099 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1100 1101 (*fact)->info.factor_mallocs = reallocs; 1102 (*fact)->info.fill_ratio_given = f; 1103 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #include "src/mat/impls/sbaij/seq/sbaij.h" 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1110 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B) 1111 { 1112 Mat C = *B; 1113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1114 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1115 IS ip=b->row; 1116 PetscErrorCode ierr; 1117 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1118 PetscInt *ai=a->i,*aj=a->j; 1119 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1120 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1121 PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 1122 PetscTruth damp,chshift; 1123 PetscInt nshift=0,ndamp=0; 1124 1125 PetscFunctionBegin; 1126 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1127 1128 /* initialization */ 1129 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1130 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1131 jl = il + mbs; 1132 rtmp = (MatScalar*)(jl + mbs); 1133 1134 shift_amount = 0; 1135 do { 1136 /* 1137 damp = PETSC_FALSE; 1138 chshift = PETSC_FALSE; 1139 */ 1140 for (i=0; i<mbs; i++) { 1141 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1142 } 1143 1144 for (k = 0; k<mbs; k++){ 1145 bval = ba + bi[k]; 1146 /* initialize k-th row by the perm[k]-th row of A */ 1147 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1148 for (j = jmin; j < jmax; j++){ 1149 col = rip[aj[j]]; 1150 if (col >= k){ /* only take upper triangular entry */ 1151 rtmp[col] = aa[j]; 1152 *bval++ = 0.0; /* for in-place factorization */ 1153 } 1154 } 1155 /* damp the diagonal of the matrix */ 1156 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1157 1158 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1159 dk = rtmp[k]; 1160 i = jl[k]; /* first row to be added to k_th row */ 1161 1162 while (i < k){ 1163 nexti = jl[i]; /* next row to be added to k_th row */ 1164 1165 /* compute multiplier, update diag(k) and U(i,k) */ 1166 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1167 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1168 dk += uikdi*ba[ili]; 1169 ba[ili] = uikdi; /* -U(i,k) */ 1170 1171 /* add multiple of row i to k-th row */ 1172 jmin = ili + 1; jmax = bi[i+1]; 1173 if (jmin < jmax){ 1174 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1175 /* update il and jl for row i */ 1176 il[i] = jmin; 1177 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1178 } 1179 i = nexti; 1180 } 1181 1182 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1183 /* calculate a shift that would make this row diagonally dominant */ 1184 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1185 jmin = bi[k]+1; 1186 nz = bi[k+1] - jmin; 1187 if (nz){ 1188 bcol = bj + jmin; 1189 bval = ba + jmin; 1190 while (nz--){ 1191 rs += PetscAbsScalar(rtmp[*bcol++]); 1192 } 1193 } 1194 /* if this shift is less than the previous, just up the previous 1195 one by a bit */ 1196 shift_amount = PetscMax(rs,1.1*shift_amount); 1197 chshift = PETSC_TRUE; 1198 /* Unlike in the ILU case there is no exit condition on nshift: 1199 we increase the shift until it converges. There is no guarantee that 1200 this algorithm converges faster or slower, or is better or worse 1201 than the ILU algorithm. */ 1202 nshift++; 1203 break; 1204 } 1205 if (PetscRealPart(dk) < zeropivot){ 1206 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1207 if (damping > 0.0) { 1208 if (ndamp) damping *= 2.0; 1209 damp = PETSC_TRUE; 1210 ndamp++; 1211 break; 1212 } else if (PetscAbsScalar(dk) < zeropivot){ 1213 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1214 } else { 1215 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1216 } 1217 } 1218 1219 /* copy data into U(k,:) */ 1220 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1221 jmin = bi[k]+1; jmax = bi[k+1]; 1222 if (jmin < jmax) { 1223 for (j=jmin; j<jmax; j++){ 1224 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1225 } 1226 /* add the k-th row into il and jl */ 1227 il[k] = jmin; 1228 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1229 } 1230 } 1231 } while (damp||chshift); 1232 ierr = PetscFree(il);CHKERRQ(ierr); 1233 1234 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1235 C->factor = FACTOR_CHOLESKY; 1236 C->assembled = PETSC_TRUE; 1237 C->preallocated = PETSC_TRUE; 1238 PetscLogFlops(C->m); 1239 if (ndamp) { 1240 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 1241 } 1242 if (nshift) { 1243 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift); 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering" 1249 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact) 1250 { 1251 Mat C = *fact; 1252 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1253 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1254 PetscErrorCode ierr; 1255 PetscInt i,j,am = A->m; 1256 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1257 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1258 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1259 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 1260 PetscTruth damp,chshift; 1261 PetscInt nshift=0; 1262 1263 PetscFunctionBegin; 1264 /* initialization */ 1265 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 1266 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1267 jl = il + am; 1268 rtmp = (MatScalar*)(jl + am); 1269 1270 shift_amount = 0; 1271 do { 1272 damp = PETSC_FALSE; 1273 chshift = PETSC_FALSE; 1274 for (i=0; i<am; i++) { 1275 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 1276 } 1277 1278 for (k = 0; k<am; k++){ 1279 /* initialize k-th row with elements nonzero in row perm(k) of A */ 1280 nz = ai[k+1] - ai[k]; 1281 acol = aj + ai[k]; 1282 aval = aa + ai[k]; 1283 bval = ba + bi[k]; 1284 while (nz -- ){ 1285 if (*acol < k) { /* skip lower triangular entries */ 1286 acol++; aval++; 1287 } else { 1288 rtmp[*acol++] = *aval++; 1289 *bval++ = 0.0; /* for in-place factorization */ 1290 } 1291 } 1292 /* damp the diagonal of the matrix */ 1293 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1294 1295 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1296 dk = rtmp[k]; 1297 i = jl[k]; /* first row to be added to k_th row */ 1298 1299 while (i < k){ 1300 nexti = jl[i]; /* next row to be added to k_th row */ 1301 /* compute multiplier, update D(k) and U(i,k) */ 1302 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1303 uikdi = - ba[ili]*ba[bi[i]]; 1304 dk += uikdi*ba[ili]; 1305 ba[ili] = uikdi; /* -U(i,k) */ 1306 1307 /* add multiple of row i to k-th row ... */ 1308 jmin = ili + 1; 1309 nz = bi[i+1] - jmin; 1310 if (nz > 0){ 1311 bcol = bj + jmin; 1312 bval = ba + jmin; 1313 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1314 /* update il and jl for i-th row */ 1315 il[i] = jmin; 1316 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1317 } 1318 i = nexti; 1319 } 1320 1321 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1322 /* calculate a shift that would make this row diagonally dominant */ 1323 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1324 jmin = bi[k]+1; 1325 nz = bi[k+1] - jmin; 1326 if (nz){ 1327 bcol = bj + jmin; 1328 bval = ba + jmin; 1329 while (nz--){ 1330 rs += PetscAbsScalar(rtmp[*bcol++]); 1331 } 1332 } 1333 /* if this shift is less than the previous, just up the previous 1334 one by a bit */ 1335 shift_amount = PetscMax(rs,1.1*shift_amount); 1336 chshift = PETSC_TRUE; 1337 /* Unlike in the ILU case there is no exit condition on nshift: 1338 we increase the shift until it converges. There is no guarantee that 1339 this algorithm converges faster or slower, or is better or worse 1340 than the ILU algorithm. */ 1341 nshift++; 1342 break; 1343 } 1344 if (PetscRealPart(dk) < zeropivot){ 1345 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1346 if (damping > 0.0) { 1347 if (ndamp) damping *= 2.0; 1348 damp = PETSC_TRUE; 1349 ndamp++; 1350 break; 1351 } else if (PetscAbsScalar(dk) < zeropivot){ 1352 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1353 } else { 1354 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1355 } 1356 } 1357 1358 /* copy data into U(k,:) */ 1359 ba[bi[k]] = 1.0/dk; 1360 jmin = bi[k]+1; 1361 nz = bi[k+1] - jmin; 1362 if (nz){ 1363 bcol = bj + jmin; 1364 bval = ba + jmin; 1365 while (nz--){ 1366 *bval++ = rtmp[*bcol]; 1367 rtmp[*bcol++] = 0.0; 1368 } 1369 /* add k-th row into il and jl */ 1370 il[k] = jmin; 1371 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1372 } 1373 } 1374 } while (damp||chshift); 1375 ierr = PetscFree(il);CHKERRQ(ierr); 1376 1377 C->factor = FACTOR_CHOLESKY; 1378 C->assembled = PETSC_TRUE; 1379 C->preallocated = PETSC_TRUE; 1380 PetscLogFlops(C->m); 1381 if (ndamp) { 1382 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1383 } 1384 if (nshift) { 1385 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #include "petscbt.h" 1391 #include "src/mat/utils/freespace.h" 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1394 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1395 { 1396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1397 Mat_SeqSBAIJ *b; 1398 Mat B; 1399 PetscErrorCode ierr; 1400 PetscTruth perm_identity; 1401 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1402 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1403 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1404 PetscReal fill=info->fill,levels=info->levels; 1405 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1406 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1407 PetscBT lnkbt; 1408 1409 PetscFunctionBegin; 1410 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1411 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1412 1413 /* special case that simply copies fill pattern */ 1414 if (!levels && perm_identity) { 1415 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1416 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1417 for (i=0; i<am; i++) { 1418 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1419 } 1420 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1421 B = *fact; 1422 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1423 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1424 1425 b = (Mat_SeqSBAIJ*)B->data; 1426 uj = b->j; 1427 for (i=0; i<am; i++) { 1428 aj = a->j + a->diag[i]; 1429 for (j=0; j<ui[i]; j++){ 1430 *uj++ = *aj++; 1431 } 1432 b->ilen[i] = ui[i]; 1433 } 1434 ierr = PetscFree(ui);CHKERRQ(ierr); 1435 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 1438 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1440 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1441 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1442 PetscFunctionReturn(0); 1443 } 1444 1445 /* initialization */ 1446 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1447 ui[0] = 0; 1448 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1449 1450 /* jl: linked list for storing indices of the pivot rows 1451 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1452 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1453 il = jl + am; 1454 uj_ptr = (PetscInt**)(il + am); 1455 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1456 for (i=0; i<am; i++){ 1457 jl[i] = am; il[i] = 0; 1458 } 1459 1460 /* create and initialize a linked list for storing column indices of the active row k */ 1461 nlnk = am + 1; 1462 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1463 1464 /* initial FreeSpace size is fill*(ai[am]+1) */ 1465 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1466 current_space = free_space; 1467 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1468 current_space_lvl = free_space_lvl; 1469 1470 for (k=0; k<am; k++){ /* for each active row k */ 1471 /* initialize lnk by the column indices of row rip[k] of A */ 1472 nzk = 0; 1473 ncols = ai[rip[k]+1] - ai[rip[k]]; 1474 ncols_upper = 0; 1475 cols = cols_lvl + am; 1476 for (j=0; j<ncols; j++){ 1477 i = rip[*(aj + ai[rip[k]] + j)]; 1478 if (i >= k){ /* only take upper triangular entry */ 1479 cols[ncols_upper] = i; 1480 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1481 ncols_upper++; 1482 } 1483 } 1484 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1485 nzk += nlnk; 1486 1487 /* update lnk by computing fill-in for each pivot row to be merged in */ 1488 prow = jl[k]; /* 1st pivot row */ 1489 1490 while (prow < k){ 1491 nextprow = jl[prow]; 1492 1493 /* merge prow into k-th row */ 1494 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1495 jmax = ui[prow+1]; 1496 ncols = jmax-jmin; 1497 i = jmin - ui[prow]; 1498 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1499 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1500 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1501 nzk += nlnk; 1502 1503 /* update il and jl for prow */ 1504 if (jmin < jmax){ 1505 il[prow] = jmin; 1506 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1507 } 1508 prow = nextprow; 1509 } 1510 1511 /* if free space is not available, make more free space */ 1512 if (current_space->local_remaining<nzk) { 1513 i = am - k + 1; /* num of unfactored rows */ 1514 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1515 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1516 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1517 reallocs++; 1518 } 1519 1520 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1521 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1522 1523 /* add the k-th row into il and jl */ 1524 if (nzk-1 > 0){ 1525 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1526 jl[k] = jl[i]; jl[i] = k; 1527 il[k] = ui[k] + 1; 1528 } 1529 uj_ptr[k] = current_space->array; 1530 uj_lvl_ptr[k] = current_space_lvl->array; 1531 1532 current_space->array += nzk; 1533 current_space->local_used += nzk; 1534 current_space->local_remaining -= nzk; 1535 1536 current_space_lvl->array += nzk; 1537 current_space_lvl->local_used += nzk; 1538 current_space_lvl->local_remaining -= nzk; 1539 1540 ui[k+1] = ui[k] + nzk; 1541 } 1542 1543 if (ai[am] != 0) { 1544 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 1545 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1546 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1547 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1548 } else { 1549 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1550 } 1551 1552 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1553 ierr = PetscFree(jl);CHKERRQ(ierr); 1554 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1555 1556 /* destroy list of free space and other temporary array(s) */ 1557 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1558 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1559 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1560 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1561 1562 /* put together the new matrix in MATSEQSBAIJ format */ 1563 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1564 B = *fact; 1565 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1566 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1567 1568 b = (Mat_SeqSBAIJ*)B->data; 1569 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1570 b->singlemalloc = PETSC_FALSE; 1571 /* the next line frees the default space generated by the Create() */ 1572 ierr = PetscFree(b->a);CHKERRQ(ierr); 1573 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1574 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1575 b->j = uj; 1576 b->i = ui; 1577 b->diag = 0; 1578 b->ilen = 0; 1579 b->imax = 0; 1580 b->row = perm; 1581 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1582 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1583 b->icol = perm; 1584 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1585 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1586 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1587 b->maxnz = b->nz = ui[am]; 1588 1589 B->factor = FACTOR_CHOLESKY; 1590 B->info.factor_mallocs = reallocs; 1591 B->info.fill_ratio_given = fill; 1592 if (ai[am] != 0) { 1593 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1594 } else { 1595 B->info.fill_ratio_needed = 0.0; 1596 } 1597 if (perm_identity){ 1598 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1599 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1601 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1602 } else { 1603 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1610 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1611 { 1612 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1613 Mat_SeqSBAIJ *b; 1614 Mat B; 1615 PetscErrorCode ierr; 1616 PetscTruth perm_identity; 1617 PetscReal fill = info->fill; 1618 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1619 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1620 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1621 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1622 PetscBT lnkbt; 1623 IS iperm; 1624 1625 PetscFunctionBegin; 1626 /* check whether perm is the identity mapping */ 1627 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1628 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1629 1630 if (!perm_identity){ 1631 /* check if perm is symmetric! */ 1632 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1633 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1634 for (i=0; i<mbs; i++) { 1635 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1636 } 1637 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1638 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1639 } 1640 1641 /* initialization */ 1642 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1643 ui[0] = 0; 1644 1645 /* jl: linked list for storing indices of the pivot rows 1646 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1647 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1648 il = jl + mbs; 1649 cols = il + mbs; 1650 ui_ptr = (PetscInt**)(cols + mbs); 1651 for (i=0; i<mbs; i++){ 1652 jl[i] = mbs; il[i] = 0; 1653 } 1654 1655 /* create and initialize a linked list for storing column indices of the active row k */ 1656 nlnk = mbs + 1; 1657 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1658 1659 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1660 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1661 current_space = free_space; 1662 1663 for (k=0; k<mbs; k++){ /* for each active row k */ 1664 /* initialize lnk by the column indices of row rip[k] of A */ 1665 nzk = 0; 1666 ncols = ai[rip[k]+1] - ai[rip[k]]; 1667 ncols_upper = 0; 1668 for (j=0; j<ncols; j++){ 1669 i = rip[*(aj + ai[rip[k]] + j)]; 1670 if (i >= k){ /* only take upper triangular entry */ 1671 cols[ncols_upper] = i; 1672 ncols_upper++; 1673 } 1674 } 1675 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1676 nzk += nlnk; 1677 1678 /* update lnk by computing fill-in for each pivot row to be merged in */ 1679 prow = jl[k]; /* 1st pivot row */ 1680 1681 while (prow < k){ 1682 nextprow = jl[prow]; 1683 /* merge prow into k-th row */ 1684 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1685 jmax = ui[prow+1]; 1686 ncols = jmax-jmin; 1687 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1688 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1689 nzk += nlnk; 1690 1691 /* update il and jl for prow */ 1692 if (jmin < jmax){ 1693 il[prow] = jmin; 1694 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1695 } 1696 prow = nextprow; 1697 } 1698 1699 /* if free space is not available, make more free space */ 1700 if (current_space->local_remaining<nzk) { 1701 i = mbs - k + 1; /* num of unfactored rows */ 1702 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1703 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1704 reallocs++; 1705 } 1706 1707 /* copy data into free space, then initialize lnk */ 1708 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1709 1710 /* add the k-th row into il and jl */ 1711 if (nzk-1 > 0){ 1712 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1713 jl[k] = jl[i]; jl[i] = k; 1714 il[k] = ui[k] + 1; 1715 } 1716 ui_ptr[k] = current_space->array; 1717 current_space->array += nzk; 1718 current_space->local_used += nzk; 1719 current_space->local_remaining -= nzk; 1720 1721 ui[k+1] = ui[k] + nzk; 1722 } 1723 1724 if (ai[mbs] != 0) { 1725 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1726 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1727 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1728 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1729 } else { 1730 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1731 } 1732 1733 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1734 ierr = PetscFree(jl);CHKERRQ(ierr); 1735 1736 /* destroy list of free space and other temporary array(s) */ 1737 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1738 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1739 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1740 1741 /* put together the new matrix in MATSEQSBAIJ format */ 1742 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1743 B = *fact; 1744 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1745 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1746 1747 b = (Mat_SeqSBAIJ*)B->data; 1748 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1749 b->singlemalloc = PETSC_FALSE; 1750 /* the next line frees the default space generated by the Create() */ 1751 ierr = PetscFree(b->a);CHKERRQ(ierr); 1752 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1753 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1754 b->j = uj; 1755 b->i = ui; 1756 b->diag = 0; 1757 b->ilen = 0; 1758 b->imax = 0; 1759 b->row = perm; 1760 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1761 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1762 b->icol = perm; 1763 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1764 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1765 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1766 b->maxnz = b->nz = ui[mbs]; 1767 1768 B->factor = FACTOR_CHOLESKY; 1769 B->info.factor_mallocs = reallocs; 1770 B->info.fill_ratio_given = fill; 1771 if (ai[mbs] != 0) { 1772 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1773 } else { 1774 B->info.fill_ratio_needed = 0.0; 1775 } 1776 if (perm_identity){ 1777 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1778 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1779 ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1780 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1781 } else { 1782 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786