1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 442 PetscTruth damp,lushift; 443 444 PetscFunctionBegin; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 455 if (b->lu_shift) { /* set max shift */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 for (j=0; j<aai[i+1]-aai[i]; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 shift_fraction = 0; shift_amount = 0; 474 do { 475 damp = PETSC_FALSE; 476 lushift = PETSC_FALSE; 477 for (i=0; i<n; i++) { 478 nz = ai[i+1] - ai[i]; 479 ajtmp = aj + ai[i]; 480 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmpold = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmpold[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 490 491 row = *ajtmp++ ; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = ai[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 PetscLogFlops(2*nz); 502 } 503 row = *ajtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + ai[i] ; 507 pj = b->j + ai[i] ; 508 nz = ai[i+1] - ai[i]; 509 diag = diag_offset[i] - ai[i]; 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 #define MAX_NSHIFT 5 517 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 518 if (nshift>MAX_NSHIFT) { 519 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 520 } else if (nshift==MAX_NSHIFT) { 521 shift_fraction = shift_hi; 522 lushift = PETSC_FALSE; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 lushift = PETSC_TRUE; 526 } 527 shift_amount = shift_fraction * shift_top; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if no shift in this attempt & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 580 { 581 PetscFunctionBegin; 582 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 583 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 584 PetscFunctionReturn(0); 585 } 586 587 588 /* ----------------------------------------------------------- */ 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatLUFactor_SeqAIJ" 591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 592 { 593 PetscErrorCode ierr; 594 Mat C; 595 596 PetscFunctionBegin; 597 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 598 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 599 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 600 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 601 PetscFunctionReturn(0); 602 } 603 /* ----------------------------------------------------------- */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatSolve_SeqAIJ" 606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 IS iscol = a->col,isrow = a->row; 610 PetscErrorCode ierr; 611 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 612 PetscInt nz,*rout,*cout; 613 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 614 615 PetscFunctionBegin; 616 if (!n) PetscFunctionReturn(0); 617 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 tmp = a->solve_work; 621 622 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 623 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 624 625 /* forward solve the lower triangular */ 626 tmp[0] = b[*r++]; 627 tmps = tmp; 628 for (i=1; i<n; i++) { 629 v = aa + ai[i] ; 630 vi = aj + ai[i] ; 631 nz = a->diag[i] - ai[i]; 632 sum = b[*r++]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 tmp[i] = sum; 635 } 636 637 /* backward solve the upper triangular */ 638 for (i=n-1; i>=0; i--){ 639 v = aa + a->diag[i] + 1; 640 vi = aj + a->diag[i] + 1; 641 nz = ai[i+1] - a->diag[i] - 1; 642 sum = tmp[i]; 643 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 644 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 645 } 646 647 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 649 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 651 PetscLogFlops(2*a->nz - A->n); 652 PetscFunctionReturn(0); 653 } 654 655 /* ----------------------------------------------------------- */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 663 PetscScalar *x,*b,*aa = a->a; 664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 665 PetscInt adiag_i,i,*vi,nz,ai_i; 666 PetscScalar *v,sum; 667 #endif 668 669 PetscFunctionBegin; 670 if (!n) PetscFunctionReturn(0); 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PetscLogFlops(2*a->nz - A->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 PetscErrorCode ierr; 714 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 715 PetscInt nz,*rout,*cout; 716 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 717 718 PetscFunctionBegin; 719 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 720 721 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 tmp = a->solve_work; 724 725 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 726 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 727 728 /* forward solve the lower triangular */ 729 tmp[0] = b[*r++]; 730 for (i=1; i<n; i++) { 731 v = aa + ai[i] ; 732 vi = aj + ai[i] ; 733 nz = a->diag[i] - ai[i]; 734 sum = b[*r++]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum; 737 } 738 739 /* backward solve the upper triangular */ 740 for (i=n-1; i>=0; i--){ 741 v = aa + a->diag[i] + 1; 742 vi = aj + a->diag[i] + 1; 743 nz = ai[i+1] - a->diag[i] - 1; 744 sum = tmp[i]; 745 while (nz--) sum -= *v++ * tmp[*vi++ ]; 746 tmp[i] = sum*aa[a->diag[i]]; 747 x[*c--] += tmp[i]; 748 } 749 750 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 752 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 754 PetscLogFlops(2*a->nz); 755 756 PetscFunctionReturn(0); 757 } 758 /* -------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout,*diag = a->diag; 768 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 773 tmp = a->solve_work; 774 775 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 776 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 777 778 /* copy the b into temp work space according to permutation */ 779 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 780 781 /* forward solve the U^T */ 782 for (i=0; i<n; i++) { 783 v = aa + diag[i] ; 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = tmp[i]; 787 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 788 while (nz--) { 789 tmp[*vi++ ] -= (*v++)*s1; 790 } 791 tmp[i] = s1; 792 } 793 794 /* backward solve the L^T */ 795 for (i=n-1; i>=0; i--){ 796 v = aa + diag[i] - 1 ; 797 vi = aj + diag[i] - 1 ; 798 nz = diag[i] - ai[i]; 799 s1 = tmp[i]; 800 while (nz--) { 801 tmp[*vi-- ] -= (*v--)*s1; 802 } 803 } 804 805 /* copy tmp into x according to permutation */ 806 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 807 808 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 809 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 813 PetscLogFlops(2*a->nz-A->n); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 820 { 821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 822 IS iscol = a->col,isrow = a->row; 823 PetscErrorCode ierr; 824 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 825 PetscInt nz,*rout,*cout,*diag = a->diag; 826 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 827 828 PetscFunctionBegin; 829 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 830 831 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 833 tmp = a->solve_work; 834 835 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 836 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 837 838 /* copy the b into temp work space according to permutation */ 839 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 840 841 /* forward solve the U^T */ 842 for (i=0; i<n; i++) { 843 v = aa + diag[i] ; 844 vi = aj + diag[i] + 1; 845 nz = ai[i+1] - diag[i] - 1; 846 tmp[i] *= *v++; 847 while (nz--) { 848 tmp[*vi++ ] -= (*v++)*tmp[i]; 849 } 850 } 851 852 /* backward solve the L^T */ 853 for (i=n-1; i>=0; i--){ 854 v = aa + diag[i] - 1 ; 855 vi = aj + diag[i] - 1 ; 856 nz = diag[i] - ai[i]; 857 while (nz--) { 858 tmp[*vi-- ] -= (*v--)*tmp[i]; 859 } 860 } 861 862 /* copy tmp into x according to permutation */ 863 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 864 865 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 866 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 867 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 869 870 PetscLogFlops(2*a->nz); 871 PetscFunctionReturn(0); 872 } 873 /* ----------------------------------------------------------------*/ 874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 879 { 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 881 IS isicol; 882 PetscErrorCode ierr; 883 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 884 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 885 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 886 PetscInt incrlev,nnz,i,levels,diagonal_fill; 887 PetscTruth col_identity,row_identity; 888 PetscReal f; 889 890 PetscFunctionBegin; 891 f = info->fill; 892 levels = (PetscInt)info->levels; 893 diagonal_fill = (PetscInt)info->diagonal_fill; 894 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 895 896 /* special case that simply copies fill pattern */ 897 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 898 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 899 if (!levels && row_identity && col_identity) { 900 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 901 (*fact)->factor = FACTOR_LU; 902 b = (Mat_SeqAIJ*)(*fact)->data; 903 if (!b->diag) { 904 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 905 } 906 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 907 b->row = isrow; 908 b->col = iscol; 909 b->icol = isicol; 910 b->lu_damping = info->damping; 911 b->lu_zeropivot = info->zeropivot; 912 b->lu_shift = info->shift; 913 b->lu_shift_fraction= info->shift_fraction; 914 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 926 ainew[0] = 0; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (PetscInt)(f*(ai[n]+1)); 929 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 930 /* ajfill is level of fill for each fill entry */ 931 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 932 /* fill is a linked list of nonzeros in active row */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 934 /* im is level for each filled value */ 935 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 936 /* dloc is location of diagonal in factor */ 937 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] ; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ ]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + nz + 1; 976 flev = ajfill + ainew[row] + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ ; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 PetscInt maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 reallocs++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] ; 1027 flev = ajfill + ainew[prow] ; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm ; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1060 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1061 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1062 PetscLogObjectParent(*fact,isicol); 1063 b = (Mat_SeqAIJ*)(*fact)->data; 1064 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1065 b->singlemalloc = PETSC_FALSE; 1066 len = (ainew[n] )*sizeof(PetscScalar); 1067 /* the next line frees the default space generated by the Create() */ 1068 ierr = PetscFree(b->a);CHKERRQ(ierr); 1069 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1070 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1071 b->j = ajnew; 1072 b->i = ainew; 1073 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1074 b->diag = dloc; 1075 b->ilen = 0; 1076 b->imax = 0; 1077 b->row = isrow; 1078 b->col = iscol; 1079 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1081 b->icol = isicol; 1082 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1083 /* In b structure: Free imax, ilen, old a, old j. 1084 Allocate dloc, solve_work, new a, new j */ 1085 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1086 b->maxnz = b->nz = ainew[n] ; 1087 b->lu_damping = info->damping; 1088 b->lu_shift = info->shift; 1089 b->lu_shift_fraction = info->shift_fraction; 1090 b->lu_zeropivot = info->zeropivot; 1091 (*fact)->factor = FACTOR_LU; 1092 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1093 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1094 1095 (*fact)->info.factor_mallocs = reallocs; 1096 (*fact)->info.fill_ratio_given = f; 1097 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1104 { 1105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1106 PetscErrorCode ierr,(*f)(Mat,Mat*); 1107 1108 PetscFunctionBegin; 1109 if (!a->sbaijMat){ 1110 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1111 } 1112 ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 1113 ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr); 1114 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1115 a->sbaijMat = PETSC_NULL; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #include "petscbt.h" 1120 #include "src/mat/utils/freespace.h" 1121 #include "src/mat/impls/sbaij/seq/sbaij.h" 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1124 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1125 { 1126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 PetscTruth perm_identity; 1129 Mat_SeqSBAIJ *b; 1130 PetscInt *rip,i,*ai = a->i,*aj = a->j; 1131 PetscInt reallocs=0; 1132 PetscInt jmin,jmax,nzk,k,j,*jl; 1133 PetscInt prow; 1134 PetscInt *il,nextprow; 1135 PetscReal fill = info->fill,levels = info->levels; 1136 PetscInt am=A->m,*ui; 1137 PetscInt nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1138 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1139 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1140 PetscBT lnkbt; 1141 1142 PetscFunctionBegin; 1143 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1144 if (!perm_identity){ 1145 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1146 } 1147 1148 /* levels = 0: simply copies fill pattern */ 1149 if (!levels) { 1150 ierr = MatConvert(A,MATSEQSBAIJ,fact);CHKERRQ(ierr); /* don't need copy numerical values! */ 1151 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1152 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1153 1154 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1155 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1160 ai = a->i; aj = a->j; 1161 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1162 1163 /* initialization */ 1164 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1165 ui[0] = 0; 1166 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1167 1168 /* jl: linked list for storing indices of the pivot rows 1169 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1170 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1171 il = jl + am; 1172 uj_ptr = (PetscInt**)(il + am); 1173 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1174 for (i=0; i<am; i++){ 1175 jl[i] = am; il[i] = 0; 1176 } 1177 1178 /* create and initialize a linked list for storing column indices of the active row k */ 1179 nlnk = am + 1; 1180 ierr = PetscLLCreate_PermutedLeveled(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1181 1182 /* initial FreeSpace size is fill*(ai[am]+1) */ 1183 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1184 current_space = free_space; 1185 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1186 current_space_lvl = free_space_lvl; 1187 1188 for (k=0; k<am; k++){ /* for each active row k */ 1189 /* initialize lnk by the column indices of row rip[k] of A */ 1190 nzk = 0; 1191 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* diag would change for !perm_identity */ 1192 cols = aj + a->diag[rip[k]]; 1193 for (j=0; j<ncols; j++) cols_lvl[j] = -1; /* initialize level for nonzero entries */ 1194 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1195 nzk += nlnk; 1196 1197 /* update lnk by computing fill-in for each pivot row to be merged in */ 1198 prow = jl[k]; /* 1st pivot row */ 1199 1200 while (prow < k){ 1201 nextprow = jl[prow]; 1202 1203 /* merge prow into k-th row */ 1204 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1205 jmax = ui[prow+1]; 1206 ncols = jmax-jmin; 1207 cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1208 /* ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); */ 1209 /* cols_lvl = uj_lvl_ptr[prow] + jmin - ui[prow]; */ 1210 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + jmin - ui[prow] + j); 1211 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1212 nzk += nlnk; 1213 1214 /* update il and jl for prow */ 1215 if (jmin < jmax){ 1216 il[prow] = jmin; 1217 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1218 } 1219 prow = nextprow; 1220 } 1221 1222 /* if free space is not available, make more free space */ 1223 if (current_space->local_remaining<nzk) { 1224 i = am - k + 1; /* num of unfactored rows */ 1225 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1226 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1227 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1228 reallocs++; 1229 } 1230 1231 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1232 ierr = PetscLLClean_PermutedLeveled(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1233 1234 /* add the k-th row into il and jl */ 1235 if (nzk-1 > 0){ 1236 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1237 jl[k] = jl[i]; jl[i] = k; 1238 il[k] = ui[k] + 1; 1239 } 1240 uj_ptr[k] = current_space->array; 1241 uj_lvl_ptr[k] = current_space_lvl->array; 1242 1243 current_space->array += nzk; 1244 current_space->local_used += nzk; 1245 current_space->local_remaining -= nzk; 1246 1247 current_space_lvl->array += nzk; 1248 current_space_lvl->local_used += nzk; 1249 current_space_lvl->local_remaining -= nzk; 1250 1251 ui[k+1] = ui[k] + nzk; 1252 } 1253 1254 if (ai[am] != 0) { 1255 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1256 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1257 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1258 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1259 } else { 1260 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1261 } 1262 1263 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1264 ierr = PetscFree(jl);CHKERRQ(ierr); 1265 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1266 1267 /* destroy list of free space and other temporary array(s) */ 1268 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1269 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1270 ierr = PetscLLDestroy_PermutedLeveled(lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1271 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1272 1273 /* put together the new matrix in MATSEQSBAIJ format */ 1274 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1275 ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr); 1276 ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr); 1277 1278 b = (Mat_SeqSBAIJ*)(*fact)->data; 1279 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1280 b->singlemalloc = PETSC_FALSE; 1281 /* the next line frees the default space generated by the Create() */ 1282 ierr = PetscFree(b->a);CHKERRQ(ierr); 1283 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1284 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1285 b->j = uj; 1286 b->i = ui; 1287 b->diag = 0; 1288 b->ilen = 0; 1289 b->imax = 0; 1290 b->row = perm; 1291 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1292 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1293 b->icol = perm; 1294 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1295 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1296 /* In b structure: Free imax, ilen, old a, old j. 1297 Allocate idnew, solve_work, new a, new j */ 1298 PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1299 b->maxnz = b->nz = ui[am]; 1300 1301 (*fact)->factor = FACTOR_CHOLESKY; 1302 (*fact)->info.factor_mallocs = reallocs; 1303 (*fact)->info.fill_ratio_given = fill; 1304 if (ai[am] != 0) { 1305 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1306 } else { 1307 (*fact)->info.fill_ratio_needed = 0.0; 1308 } 1309 /* Should the followings be moved to MatCholeskyFactorNumeric_SeqAIJ()? */ 1310 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1311 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1312 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1313 1314 /* -------- end of new ---------------------------*/ 1315 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1316 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1317 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1323 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1324 { 1325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1326 Mat_SeqSBAIJ *b; 1327 PetscErrorCode ierr; 1328 PetscTruth perm_identity; 1329 PetscReal fill = info->fill; 1330 PetscInt *rip,i,am=A->m,*ai,*aj,reallocs=0,prow; 1331 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1332 PetscInt nlnk,*lnk,ncols,*cols,*uj,**uj_ptr; 1333 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1334 PetscBT lnkbt; 1335 1336 PetscFunctionBegin; 1337 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1338 if (!perm_identity){ 1339 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1340 } 1341 1342 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1343 ai = a->i; aj = a->j; 1344 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1345 1346 /* initialization */ 1347 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1348 ui[0] = 0; 1349 1350 /* jl: linked list for storing indices of the pivot rows 1351 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1352 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1353 il = jl + am; 1354 uj_ptr = (PetscInt**)(il + am); 1355 for (i=0; i<am; i++){ 1356 jl[i] = am; il[i] = 0; 1357 } 1358 1359 /* create and initialize a linked list for storing column indices of the active row k */ 1360 nlnk = am + 1; 1361 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1362 1363 /* initial FreeSpace size is fill*(ai[am]+1) */ 1364 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1365 current_space = free_space; 1366 1367 for (k=0; k<am; k++){ /* for each active row k */ 1368 /* initialize lnk by the column indices of row rip[k] of A */ 1369 nzk = 0; 1370 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* diag would change for !perm_identity */ 1371 cols = aj + a->diag[rip[k]]; 1372 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1373 nzk += nlnk; 1374 1375 /* update lnk by computing fill-in for each pivot row to be merged in */ 1376 prow = jl[k]; /* 1st pivot row */ 1377 1378 while (prow < k){ 1379 nextprow = jl[prow]; 1380 1381 /* merge prow into k-th row */ 1382 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1383 jmax = ui[prow+1]; 1384 ncols = jmax-jmin; 1385 cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1386 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1387 nzk += nlnk; 1388 1389 /* update il and jl for prow */ 1390 if (jmin < jmax){ 1391 il[prow] = jmin; 1392 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1393 } 1394 prow = nextprow; 1395 } 1396 1397 /* if free space is not available, make more free space */ 1398 if (current_space->local_remaining<nzk) { 1399 i = am - k + 1; /* num of unfactored rows */ 1400 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1401 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1402 reallocs++; 1403 } 1404 1405 /* copy data into free space, then initialize lnk */ 1406 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1407 1408 /* add the k-th row into il and jl */ 1409 if (nzk-1 > 0){ 1410 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1411 jl[k] = jl[i]; jl[i] = k; 1412 il[k] = ui[k] + 1; 1413 } 1414 uj_ptr[k] = current_space->array; 1415 current_space->array += nzk; 1416 current_space->local_used += nzk; 1417 current_space->local_remaining -= nzk; 1418 1419 ui[k+1] = ui[k] + nzk; 1420 } 1421 1422 if (ai[am] != 0) { 1423 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1424 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1425 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1426 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1427 } else { 1428 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1429 } 1430 1431 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1432 ierr = PetscFree(jl);CHKERRQ(ierr); 1433 1434 /* destroy list of free space and other temporary array(s) */ 1435 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1436 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1437 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1438 1439 /* put together the new matrix in MATSEQSBAIJ format */ 1440 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1441 ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr); 1442 ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr); 1443 1444 b = (Mat_SeqSBAIJ*)(*fact)->data; 1445 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1446 b->singlemalloc = PETSC_FALSE; 1447 /* the next line frees the default space generated by the Create() */ 1448 ierr = PetscFree(b->a);CHKERRQ(ierr); 1449 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1450 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1451 b->j = uj; 1452 b->i = ui; 1453 b->diag = 0; 1454 b->ilen = 0; 1455 b->imax = 0; 1456 b->row = perm; 1457 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1458 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1459 b->icol = perm; 1460 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1461 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1462 /* In b structure: Free imax, ilen, old a, old j. 1463 Allocate idnew, solve_work, new a, new j */ 1464 PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1465 b->maxnz = b->nz = ui[am]; 1466 1467 (*fact)->factor = FACTOR_CHOLESKY; 1468 (*fact)->info.factor_mallocs = reallocs; 1469 (*fact)->info.fill_ratio_given = fill; 1470 if (ai[am] != 0) { 1471 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1472 } else { 1473 (*fact)->info.fill_ratio_needed = 0.0; 1474 } 1475 /* Should the followings be moved to MatCholeskyFactorNumeric_SeqAIJ()? */ 1476 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1477 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1478 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1479 1480 /* -------- end of new ---------------------------*/ 1481 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1482 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1483 1484 PetscFunctionReturn(0); 1485 } 1486