1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 442 PetscTruth damp,lushift; 443 444 PetscFunctionBegin; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 455 if (b->lu_shift) { /* set max shift */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 for (j=0; j<aai[i+1]-aai[i]; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 shift_fraction = 0; shift_amount = 0; 474 do { 475 damp = PETSC_FALSE; 476 lushift = PETSC_FALSE; 477 for (i=0; i<n; i++) { 478 nz = ai[i+1] - ai[i]; 479 ajtmp = aj + ai[i]; 480 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmpold = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmpold[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 490 491 row = *ajtmp++ ; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = ai[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 PetscLogFlops(2*nz); 502 } 503 row = *ajtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + ai[i] ; 507 pj = b->j + ai[i] ; 508 nz = ai[i+1] - ai[i]; 509 diag = diag_offset[i] - ai[i]; 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 #define MAX_NSHIFT 5 517 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 518 if (nshift>MAX_NSHIFT) { 519 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 520 } else if (nshift==MAX_NSHIFT) { 521 shift_fraction = shift_hi; 522 lushift = PETSC_FALSE; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 lushift = PETSC_TRUE; 526 } 527 shift_amount = shift_fraction * shift_top; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if no shift in this attempt & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 580 { 581 PetscFunctionBegin; 582 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 583 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 584 PetscFunctionReturn(0); 585 } 586 587 588 /* ----------------------------------------------------------- */ 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatLUFactor_SeqAIJ" 591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 592 { 593 PetscErrorCode ierr; 594 Mat C; 595 596 PetscFunctionBegin; 597 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 598 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 599 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 600 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 601 PetscFunctionReturn(0); 602 } 603 /* ----------------------------------------------------------- */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatSolve_SeqAIJ" 606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 IS iscol = a->col,isrow = a->row; 610 PetscErrorCode ierr; 611 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 612 PetscInt nz,*rout,*cout; 613 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 614 615 PetscFunctionBegin; 616 if (!n) PetscFunctionReturn(0); 617 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 tmp = a->solve_work; 621 622 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 623 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 624 625 /* forward solve the lower triangular */ 626 tmp[0] = b[*r++]; 627 tmps = tmp; 628 for (i=1; i<n; i++) { 629 v = aa + ai[i] ; 630 vi = aj + ai[i] ; 631 nz = a->diag[i] - ai[i]; 632 sum = b[*r++]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 tmp[i] = sum; 635 } 636 637 /* backward solve the upper triangular */ 638 for (i=n-1; i>=0; i--){ 639 v = aa + a->diag[i] + 1; 640 vi = aj + a->diag[i] + 1; 641 nz = ai[i+1] - a->diag[i] - 1; 642 sum = tmp[i]; 643 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 644 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 645 } 646 647 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 649 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 651 PetscLogFlops(2*a->nz - A->n); 652 PetscFunctionReturn(0); 653 } 654 655 /* ----------------------------------------------------------- */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 663 PetscScalar *x,*b,*aa = a->a; 664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 665 PetscInt adiag_i,i,*vi,nz,ai_i; 666 PetscScalar *v,sum; 667 #endif 668 669 PetscFunctionBegin; 670 if (!n) PetscFunctionReturn(0); 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PetscLogFlops(2*a->nz - A->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 PetscErrorCode ierr; 714 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 715 PetscInt nz,*rout,*cout; 716 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 717 718 PetscFunctionBegin; 719 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 720 721 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 tmp = a->solve_work; 724 725 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 726 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 727 728 /* forward solve the lower triangular */ 729 tmp[0] = b[*r++]; 730 for (i=1; i<n; i++) { 731 v = aa + ai[i] ; 732 vi = aj + ai[i] ; 733 nz = a->diag[i] - ai[i]; 734 sum = b[*r++]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum; 737 } 738 739 /* backward solve the upper triangular */ 740 for (i=n-1; i>=0; i--){ 741 v = aa + a->diag[i] + 1; 742 vi = aj + a->diag[i] + 1; 743 nz = ai[i+1] - a->diag[i] - 1; 744 sum = tmp[i]; 745 while (nz--) sum -= *v++ * tmp[*vi++ ]; 746 tmp[i] = sum*aa[a->diag[i]]; 747 x[*c--] += tmp[i]; 748 } 749 750 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 752 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 754 PetscLogFlops(2*a->nz); 755 756 PetscFunctionReturn(0); 757 } 758 /* -------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout,*diag = a->diag; 768 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 773 tmp = a->solve_work; 774 775 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 776 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 777 778 /* copy the b into temp work space according to permutation */ 779 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 780 781 /* forward solve the U^T */ 782 for (i=0; i<n; i++) { 783 v = aa + diag[i] ; 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = tmp[i]; 787 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 788 while (nz--) { 789 tmp[*vi++ ] -= (*v++)*s1; 790 } 791 tmp[i] = s1; 792 } 793 794 /* backward solve the L^T */ 795 for (i=n-1; i>=0; i--){ 796 v = aa + diag[i] - 1 ; 797 vi = aj + diag[i] - 1 ; 798 nz = diag[i] - ai[i]; 799 s1 = tmp[i]; 800 while (nz--) { 801 tmp[*vi-- ] -= (*v--)*s1; 802 } 803 } 804 805 /* copy tmp into x according to permutation */ 806 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 807 808 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 809 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 813 PetscLogFlops(2*a->nz-A->n); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 820 { 821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 822 IS iscol = a->col,isrow = a->row; 823 PetscErrorCode ierr; 824 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 825 PetscInt nz,*rout,*cout,*diag = a->diag; 826 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 827 828 PetscFunctionBegin; 829 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 830 831 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 833 tmp = a->solve_work; 834 835 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 836 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 837 838 /* copy the b into temp work space according to permutation */ 839 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 840 841 /* forward solve the U^T */ 842 for (i=0; i<n; i++) { 843 v = aa + diag[i] ; 844 vi = aj + diag[i] + 1; 845 nz = ai[i+1] - diag[i] - 1; 846 tmp[i] *= *v++; 847 while (nz--) { 848 tmp[*vi++ ] -= (*v++)*tmp[i]; 849 } 850 } 851 852 /* backward solve the L^T */ 853 for (i=n-1; i>=0; i--){ 854 v = aa + diag[i] - 1 ; 855 vi = aj + diag[i] - 1 ; 856 nz = diag[i] - ai[i]; 857 while (nz--) { 858 tmp[*vi-- ] -= (*v--)*tmp[i]; 859 } 860 } 861 862 /* copy tmp into x according to permutation */ 863 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 864 865 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 866 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 867 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 869 870 PetscLogFlops(2*a->nz); 871 PetscFunctionReturn(0); 872 } 873 /* ----------------------------------------------------------------*/ 874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 879 { 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 881 IS isicol; 882 PetscErrorCode ierr; 883 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 884 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 885 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 886 PetscInt incrlev,nnz,i,levels,diagonal_fill; 887 PetscTruth col_identity,row_identity; 888 PetscReal f; 889 890 PetscFunctionBegin; 891 f = info->fill; 892 levels = (PetscInt)info->levels; 893 diagonal_fill = (PetscInt)info->diagonal_fill; 894 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 895 896 /* special case that simply copies fill pattern */ 897 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 898 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 899 if (!levels && row_identity && col_identity) { 900 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 901 (*fact)->factor = FACTOR_LU; 902 b = (Mat_SeqAIJ*)(*fact)->data; 903 if (!b->diag) { 904 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 905 } 906 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 907 b->row = isrow; 908 b->col = iscol; 909 b->icol = isicol; 910 b->lu_damping = info->damping; 911 b->lu_zeropivot = info->zeropivot; 912 b->lu_shift = info->shift; 913 b->lu_shift_fraction= info->shift_fraction; 914 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 926 ainew[0] = 0; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (PetscInt)(f*(ai[n]+1)); 929 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 930 /* ajfill is level of fill for each fill entry */ 931 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 932 /* fill is a linked list of nonzeros in active row */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 934 /* im is level for each filled value */ 935 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 936 /* dloc is location of diagonal in factor */ 937 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] ; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ ]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + nz + 1; 976 flev = ajfill + ainew[row] + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ ; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 PetscInt maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 reallocs++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] ; 1027 flev = ajfill + ainew[prow] ; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm ; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1060 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1061 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1062 PetscLogObjectParent(*fact,isicol); 1063 b = (Mat_SeqAIJ*)(*fact)->data; 1064 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1065 b->singlemalloc = PETSC_FALSE; 1066 len = (ainew[n] )*sizeof(PetscScalar); 1067 /* the next line frees the default space generated by the Create() */ 1068 ierr = PetscFree(b->a);CHKERRQ(ierr); 1069 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1070 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1071 b->j = ajnew; 1072 b->i = ainew; 1073 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1074 b->diag = dloc; 1075 b->ilen = 0; 1076 b->imax = 0; 1077 b->row = isrow; 1078 b->col = iscol; 1079 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1081 b->icol = isicol; 1082 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1083 /* In b structure: Free imax, ilen, old a, old j. 1084 Allocate dloc, solve_work, new a, new j */ 1085 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1086 b->maxnz = b->nz = ainew[n] ; 1087 b->lu_damping = info->damping; 1088 b->lu_shift = info->shift; 1089 b->lu_shift_fraction = info->shift_fraction; 1090 b->lu_zeropivot = info->zeropivot; 1091 (*fact)->factor = FACTOR_LU; 1092 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1093 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1094 1095 (*fact)->info.factor_mallocs = reallocs; 1096 (*fact)->info.fill_ratio_given = f; 1097 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1104 { 1105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1106 PetscErrorCode ierr,(*f)(Mat,Mat*); 1107 1108 PetscFunctionBegin; 1109 if (!a->sbaijMat){ 1110 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1111 } 1112 ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 1113 ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr); 1114 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1115 a->sbaijMat = PETSC_NULL; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #include "petscbt.h" 1120 #include "src/mat/utils/freespace.h" 1121 #include "src/mat/impls/sbaij/seq/sbaij.h" 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1124 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1125 { 1126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1127 Mat_SeqSBAIJ *b; 1128 Mat B; 1129 PetscErrorCode ierr; 1130 PetscTruth perm_identity; 1131 PetscInt reallocs=0,*rip,i,*ai,*aj,am=A->m,*ui; 1132 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1133 PetscInt nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1134 PetscReal fill=info->fill,levels=info->levels; 1135 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1136 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1137 PetscBT lnkbt; 1138 1139 PetscFunctionBegin; 1140 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1141 if (!perm_identity){ 1142 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1143 } 1144 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1145 ai = a->i; aj = a->j; 1146 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1147 1148 /* levels = 0: simply copies fill pattern */ 1149 if (!levels) { 1150 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1151 for (i=0; i<am; i++) { 1152 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1153 } 1154 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1155 B = *fact; 1156 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1157 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1158 1159 b = (Mat_SeqSBAIJ*)B->data; 1160 uj = b->j; 1161 for (i=0; i<am; i++) { 1162 aj = a->j + a->diag[i]; 1163 for (j=0; j<ui[i]; j++){ 1164 *uj++ = *aj++; 1165 } 1166 b->ilen[i] = ui[i]; 1167 } 1168 ierr = PetscFree(ui);CHKERRQ(ierr); 1169 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1170 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1171 1172 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1173 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1174 1175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatICCFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1176 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 /* initialization */ 1181 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1182 ui[0] = 0; 1183 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1184 1185 /* jl: linked list for storing indices of the pivot rows 1186 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1187 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1188 il = jl + am; 1189 uj_ptr = (PetscInt**)(il + am); 1190 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1191 for (i=0; i<am; i++){ 1192 jl[i] = am; il[i] = 0; 1193 } 1194 1195 /* create and initialize a linked list for storing column indices of the active row k */ 1196 nlnk = am + 1; 1197 ierr = PetscLLCreate_PermutedLeveled(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1198 1199 /* initial FreeSpace size is fill*(ai[am]+1) */ 1200 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1201 current_space = free_space; 1202 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1203 current_space_lvl = free_space_lvl; 1204 1205 for (k=0; k<am; k++){ /* for each active row k */ 1206 /* initialize lnk by the column indices of row rip[k] of A */ 1207 nzk = 0; 1208 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* changes for !perm_identity */ 1209 cols = aj + a->diag[rip[k]]; 1210 for (j=0; j<ncols; j++) cols_lvl[j] = -1; /* initialize level for nonzero entries */ 1211 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1212 nzk += nlnk; 1213 1214 /* update lnk by computing fill-in for each pivot row to be merged in */ 1215 prow = jl[k]; /* 1st pivot row */ 1216 1217 while (prow < k){ 1218 nextprow = jl[prow]; 1219 1220 /* merge prow into k-th row */ 1221 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1222 jmax = ui[prow+1]; 1223 ncols = jmax-jmin; 1224 i = jmin - ui[prow]; 1225 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1226 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1227 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1228 nzk += nlnk; 1229 1230 /* update il and jl for prow */ 1231 if (jmin < jmax){ 1232 il[prow] = jmin; 1233 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1234 } 1235 prow = nextprow; 1236 } 1237 1238 /* if free space is not available, make more free space */ 1239 if (current_space->local_remaining<nzk) { 1240 i = am - k + 1; /* num of unfactored rows */ 1241 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1242 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1243 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1244 reallocs++; 1245 } 1246 1247 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1248 ierr = PetscLLClean_PermutedLeveled(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1249 1250 /* add the k-th row into il and jl */ 1251 if (nzk-1 > 0){ 1252 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1253 jl[k] = jl[i]; jl[i] = k; 1254 il[k] = ui[k] + 1; 1255 } 1256 uj_ptr[k] = current_space->array; 1257 uj_lvl_ptr[k] = current_space_lvl->array; 1258 1259 current_space->array += nzk; 1260 current_space->local_used += nzk; 1261 current_space->local_remaining -= nzk; 1262 1263 current_space_lvl->array += nzk; 1264 current_space_lvl->local_used += nzk; 1265 current_space_lvl->local_remaining -= nzk; 1266 1267 ui[k+1] = ui[k] + nzk; 1268 } 1269 1270 if (ai[am] != 0) { 1271 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1272 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1273 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1274 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1275 } else { 1276 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1277 } 1278 1279 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1280 ierr = PetscFree(jl);CHKERRQ(ierr); 1281 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1282 1283 /* destroy list of free space and other temporary array(s) */ 1284 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1285 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1286 ierr = PetscLLDestroy_PermutedLeveled(lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1287 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1288 1289 /* put together the new matrix in MATSEQSBAIJ format */ 1290 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1291 B = *fact; 1292 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1293 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1294 1295 b = (Mat_SeqSBAIJ*)B->data; 1296 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1297 b->singlemalloc = PETSC_FALSE; 1298 /* the next line frees the default space generated by the Create() */ 1299 ierr = PetscFree(b->a);CHKERRQ(ierr); 1300 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1301 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1302 b->j = uj; 1303 b->i = ui; 1304 b->diag = 0; 1305 b->ilen = 0; 1306 b->imax = 0; 1307 b->row = perm; 1308 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1309 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1310 b->icol = perm; 1311 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1312 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1313 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1314 b->maxnz = b->nz = ui[am]; 1315 1316 B->factor = FACTOR_CHOLESKY; 1317 B->info.factor_mallocs = reallocs; 1318 B->info.fill_ratio_given = fill; 1319 if (ai[am] != 0) { 1320 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1321 } else { 1322 B->info.fill_ratio_needed = 0.0; 1323 } 1324 /* Should the followings be moved to MatCholeskyFactorNumeric_SeqAIJ()? */ 1325 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1326 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1327 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1328 1329 /* -------- end of new ---------------------------*/ 1330 ierr = PetscObjectComposeFunction((PetscObject) B,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1331 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1332 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1338 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1339 { 1340 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1341 Mat_SeqSBAIJ *b; 1342 PetscErrorCode ierr; 1343 PetscTruth perm_identity; 1344 PetscReal fill = info->fill; 1345 PetscInt *rip,i,am=A->m,*ai,*aj,reallocs=0,prow; 1346 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1347 PetscInt nlnk,*lnk,ncols,*cols,*uj,**uj_ptr; 1348 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1349 PetscBT lnkbt; 1350 1351 PetscFunctionBegin; 1352 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1353 if (!perm_identity){ 1354 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1355 } 1356 1357 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1358 ai = a->i; aj = a->j; 1359 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1360 1361 /* initialization */ 1362 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1363 ui[0] = 0; 1364 1365 /* jl: linked list for storing indices of the pivot rows 1366 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1367 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1368 il = jl + am; 1369 uj_ptr = (PetscInt**)(il + am); 1370 for (i=0; i<am; i++){ 1371 jl[i] = am; il[i] = 0; 1372 } 1373 1374 /* create and initialize a linked list for storing column indices of the active row k */ 1375 nlnk = am + 1; 1376 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1377 1378 /* initial FreeSpace size is fill*(ai[am]+1) */ 1379 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1380 current_space = free_space; 1381 1382 for (k=0; k<am; k++){ /* for each active row k */ 1383 /* initialize lnk by the column indices of row rip[k] of A */ 1384 nzk = 0; 1385 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* diag would change for !perm_identity */ 1386 cols = aj + a->diag[rip[k]]; 1387 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1388 nzk += nlnk; 1389 1390 /* update lnk by computing fill-in for each pivot row to be merged in */ 1391 prow = jl[k]; /* 1st pivot row */ 1392 1393 while (prow < k){ 1394 nextprow = jl[prow]; 1395 1396 /* merge prow into k-th row */ 1397 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1398 jmax = ui[prow+1]; 1399 ncols = jmax-jmin; 1400 cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1401 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1402 nzk += nlnk; 1403 1404 /* update il and jl for prow */ 1405 if (jmin < jmax){ 1406 il[prow] = jmin; 1407 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1408 } 1409 prow = nextprow; 1410 } 1411 1412 /* if free space is not available, make more free space */ 1413 if (current_space->local_remaining<nzk) { 1414 i = am - k + 1; /* num of unfactored rows */ 1415 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1416 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1417 reallocs++; 1418 } 1419 1420 /* copy data into free space, then initialize lnk */ 1421 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1422 1423 /* add the k-th row into il and jl */ 1424 if (nzk-1 > 0){ 1425 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1426 jl[k] = jl[i]; jl[i] = k; 1427 il[k] = ui[k] + 1; 1428 } 1429 uj_ptr[k] = current_space->array; 1430 current_space->array += nzk; 1431 current_space->local_used += nzk; 1432 current_space->local_remaining -= nzk; 1433 1434 ui[k+1] = ui[k] + nzk; 1435 } 1436 1437 if (ai[am] != 0) { 1438 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1439 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1440 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1441 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1442 } else { 1443 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1444 } 1445 1446 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1447 ierr = PetscFree(jl);CHKERRQ(ierr); 1448 1449 /* destroy list of free space and other temporary array(s) */ 1450 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1451 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1452 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1453 1454 /* put together the new matrix in MATSEQSBAIJ format */ 1455 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1456 ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr); 1457 ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr); 1458 1459 b = (Mat_SeqSBAIJ*)(*fact)->data; 1460 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1461 b->singlemalloc = PETSC_FALSE; 1462 /* the next line frees the default space generated by the Create() */ 1463 ierr = PetscFree(b->a);CHKERRQ(ierr); 1464 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1465 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1466 b->j = uj; 1467 b->i = ui; 1468 b->diag = 0; 1469 b->ilen = 0; 1470 b->imax = 0; 1471 b->row = perm; 1472 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1473 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1474 b->icol = perm; 1475 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1476 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1477 /* In b structure: Free imax, ilen, old a, old j. 1478 Allocate idnew, solve_work, new a, new j */ 1479 PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1480 b->maxnz = b->nz = ui[am]; 1481 1482 (*fact)->factor = FACTOR_CHOLESKY; 1483 (*fact)->info.factor_mallocs = reallocs; 1484 (*fact)->info.fill_ratio_given = fill; 1485 if (ai[am] != 0) { 1486 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1487 } else { 1488 (*fact)->info.fill_ratio_needed = 0.0; 1489 } 1490 /* Should the followings be moved to MatCholeskyFactorNumeric_SeqAIJ()? */ 1491 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1492 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1493 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1494 1495 /* -------- end of new ---------------------------*/ 1496 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1497 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1498 1499 PetscFunctionReturn(0); 1500 } 1501