1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 442 PetscTruth damp,lushift; 443 444 PetscFunctionBegin; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 455 if (b->lu_shift) { /* set max shift */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 for (j=0; j<aai[i+1]-aai[i]; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 shift_fraction = 0; shift_amount = 0; 474 do { 475 damp = PETSC_FALSE; 476 lushift = PETSC_FALSE; 477 for (i=0; i<n; i++) { 478 nz = ai[i+1] - ai[i]; 479 ajtmp = aj + ai[i]; 480 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmpold = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmpold[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 490 491 row = *ajtmp++ ; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = ai[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 PetscLogFlops(2*nz); 502 } 503 row = *ajtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + ai[i] ; 507 pj = b->j + ai[i] ; 508 nz = ai[i+1] - ai[i]; 509 diag = diag_offset[i] - ai[i]; 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 #define MAX_NSHIFT 5 517 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 518 if (nshift>MAX_NSHIFT) { 519 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 520 } else if (nshift==MAX_NSHIFT) { 521 shift_fraction = shift_hi; 522 lushift = PETSC_FALSE; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 lushift = PETSC_TRUE; 526 } 527 shift_amount = shift_fraction * shift_top; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if no shift in this attempt & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 580 { 581 PetscFunctionBegin; 582 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 583 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 584 PetscFunctionReturn(0); 585 } 586 587 588 /* ----------------------------------------------------------- */ 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatLUFactor_SeqAIJ" 591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 592 { 593 PetscErrorCode ierr; 594 Mat C; 595 596 PetscFunctionBegin; 597 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 598 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 599 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 600 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 601 PetscFunctionReturn(0); 602 } 603 /* ----------------------------------------------------------- */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatSolve_SeqAIJ" 606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 IS iscol = a->col,isrow = a->row; 610 PetscErrorCode ierr; 611 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 612 PetscInt nz,*rout,*cout; 613 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 614 615 PetscFunctionBegin; 616 if (!n) PetscFunctionReturn(0); 617 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 tmp = a->solve_work; 621 622 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 623 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 624 625 /* forward solve the lower triangular */ 626 tmp[0] = b[*r++]; 627 tmps = tmp; 628 for (i=1; i<n; i++) { 629 v = aa + ai[i] ; 630 vi = aj + ai[i] ; 631 nz = a->diag[i] - ai[i]; 632 sum = b[*r++]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 tmp[i] = sum; 635 } 636 637 /* backward solve the upper triangular */ 638 for (i=n-1; i>=0; i--){ 639 v = aa + a->diag[i] + 1; 640 vi = aj + a->diag[i] + 1; 641 nz = ai[i+1] - a->diag[i] - 1; 642 sum = tmp[i]; 643 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 644 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 645 } 646 647 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 649 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 651 PetscLogFlops(2*a->nz - A->n); 652 PetscFunctionReturn(0); 653 } 654 655 /* ----------------------------------------------------------- */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 663 PetscScalar *x,*b,*aa = a->a; 664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 665 PetscInt adiag_i,i,*vi,nz,ai_i; 666 PetscScalar *v,sum; 667 #endif 668 669 PetscFunctionBegin; 670 if (!n) PetscFunctionReturn(0); 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PetscLogFlops(2*a->nz - A->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 PetscErrorCode ierr; 714 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 715 PetscInt nz,*rout,*cout; 716 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 717 718 PetscFunctionBegin; 719 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 720 721 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 tmp = a->solve_work; 724 725 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 726 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 727 728 /* forward solve the lower triangular */ 729 tmp[0] = b[*r++]; 730 for (i=1; i<n; i++) { 731 v = aa + ai[i] ; 732 vi = aj + ai[i] ; 733 nz = a->diag[i] - ai[i]; 734 sum = b[*r++]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum; 737 } 738 739 /* backward solve the upper triangular */ 740 for (i=n-1; i>=0; i--){ 741 v = aa + a->diag[i] + 1; 742 vi = aj + a->diag[i] + 1; 743 nz = ai[i+1] - a->diag[i] - 1; 744 sum = tmp[i]; 745 while (nz--) sum -= *v++ * tmp[*vi++ ]; 746 tmp[i] = sum*aa[a->diag[i]]; 747 x[*c--] += tmp[i]; 748 } 749 750 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 752 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 754 PetscLogFlops(2*a->nz); 755 756 PetscFunctionReturn(0); 757 } 758 /* -------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout,*diag = a->diag; 768 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 773 tmp = a->solve_work; 774 775 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 776 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 777 778 /* copy the b into temp work space according to permutation */ 779 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 780 781 /* forward solve the U^T */ 782 for (i=0; i<n; i++) { 783 v = aa + diag[i] ; 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = tmp[i]; 787 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 788 while (nz--) { 789 tmp[*vi++ ] -= (*v++)*s1; 790 } 791 tmp[i] = s1; 792 } 793 794 /* backward solve the L^T */ 795 for (i=n-1; i>=0; i--){ 796 v = aa + diag[i] - 1 ; 797 vi = aj + diag[i] - 1 ; 798 nz = diag[i] - ai[i]; 799 s1 = tmp[i]; 800 while (nz--) { 801 tmp[*vi-- ] -= (*v--)*s1; 802 } 803 } 804 805 /* copy tmp into x according to permutation */ 806 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 807 808 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 809 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 813 PetscLogFlops(2*a->nz-A->n); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 820 { 821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 822 IS iscol = a->col,isrow = a->row; 823 PetscErrorCode ierr; 824 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 825 PetscInt nz,*rout,*cout,*diag = a->diag; 826 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 827 828 PetscFunctionBegin; 829 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 830 831 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 833 tmp = a->solve_work; 834 835 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 836 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 837 838 /* copy the b into temp work space according to permutation */ 839 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 840 841 /* forward solve the U^T */ 842 for (i=0; i<n; i++) { 843 v = aa + diag[i] ; 844 vi = aj + diag[i] + 1; 845 nz = ai[i+1] - diag[i] - 1; 846 tmp[i] *= *v++; 847 while (nz--) { 848 tmp[*vi++ ] -= (*v++)*tmp[i]; 849 } 850 } 851 852 /* backward solve the L^T */ 853 for (i=n-1; i>=0; i--){ 854 v = aa + diag[i] - 1 ; 855 vi = aj + diag[i] - 1 ; 856 nz = diag[i] - ai[i]; 857 while (nz--) { 858 tmp[*vi-- ] -= (*v--)*tmp[i]; 859 } 860 } 861 862 /* copy tmp into x according to permutation */ 863 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 864 865 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 866 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 867 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 869 870 PetscLogFlops(2*a->nz); 871 PetscFunctionReturn(0); 872 } 873 /* ----------------------------------------------------------------*/ 874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 879 { 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 881 IS isicol; 882 PetscErrorCode ierr; 883 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 884 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 885 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 886 PetscInt incrlev,nnz,i,levels,diagonal_fill; 887 PetscTruth col_identity,row_identity; 888 PetscReal f; 889 890 PetscFunctionBegin; 891 f = info->fill; 892 levels = (PetscInt)info->levels; 893 diagonal_fill = (PetscInt)info->diagonal_fill; 894 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 895 896 /* special case that simply copies fill pattern */ 897 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 898 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 899 if (!levels && row_identity && col_identity) { 900 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 901 (*fact)->factor = FACTOR_LU; 902 b = (Mat_SeqAIJ*)(*fact)->data; 903 if (!b->diag) { 904 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 905 } 906 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 907 b->row = isrow; 908 b->col = iscol; 909 b->icol = isicol; 910 b->lu_damping = info->damping; 911 b->lu_zeropivot = info->zeropivot; 912 b->lu_shift = info->shift; 913 b->lu_shift_fraction= info->shift_fraction; 914 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 926 ainew[0] = 0; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (PetscInt)(f*(ai[n]+1)); 929 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 930 /* ajfill is level of fill for each fill entry */ 931 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 932 /* fill is a linked list of nonzeros in active row */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 934 /* im is level for each filled value */ 935 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 936 /* dloc is location of diagonal in factor */ 937 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] ; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ ]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + nz + 1; 976 flev = ajfill + ainew[row] + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ ; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 PetscInt maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 reallocs++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] ; 1027 flev = ajfill + ainew[prow] ; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm ; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1060 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1061 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1062 PetscLogObjectParent(*fact,isicol); 1063 b = (Mat_SeqAIJ*)(*fact)->data; 1064 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1065 b->singlemalloc = PETSC_FALSE; 1066 len = (ainew[n] )*sizeof(PetscScalar); 1067 /* the next line frees the default space generated by the Create() */ 1068 ierr = PetscFree(b->a);CHKERRQ(ierr); 1069 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1070 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1071 b->j = ajnew; 1072 b->i = ainew; 1073 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1074 b->diag = dloc; 1075 b->ilen = 0; 1076 b->imax = 0; 1077 b->row = isrow; 1078 b->col = iscol; 1079 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1081 b->icol = isicol; 1082 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1083 /* In b structure: Free imax, ilen, old a, old j. 1084 Allocate dloc, solve_work, new a, new j */ 1085 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1086 b->maxnz = b->nz = ainew[n] ; 1087 b->lu_damping = info->damping; 1088 b->lu_shift = info->shift; 1089 b->lu_shift_fraction = info->shift_fraction; 1090 b->lu_zeropivot = info->zeropivot; 1091 (*fact)->factor = FACTOR_LU; 1092 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1093 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1094 1095 (*fact)->info.factor_mallocs = reallocs; 1096 (*fact)->info.fill_ratio_given = f; 1097 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #include "src/mat/impls/sbaij/seq/sbaij.h" 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1104 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *B) 1105 { 1106 Mat C = *B; 1107 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1108 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1109 IS ip=b->row; 1110 PetscErrorCode ierr; 1111 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1112 PetscInt *ai=a->i,*aj=a->j; 1113 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1114 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1115 PetscReal damping=b->factor_damping,zeropivot=b->factor_zeropivot,shift_amount; 1116 PetscTruth damp,chshift; 1117 PetscInt nshift=0,ndamp=0; 1118 1119 PetscFunctionBegin; 1120 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1121 1122 /* initialization */ 1123 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1124 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1125 jl = il + mbs; 1126 rtmp = (MatScalar*)(jl + mbs); 1127 1128 shift_amount = 0; 1129 do { 1130 damp = PETSC_FALSE; 1131 chshift = PETSC_FALSE; 1132 for (i=0; i<mbs; i++) { 1133 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1134 } 1135 1136 for (k = 0; k<mbs; k++){ 1137 bval = ba + bi[k]; 1138 /* initialize k-th row by the perm[k]-th row of A */ 1139 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1140 for (j = jmin; j < jmax; j++){ 1141 col = rip[aj[j]]; 1142 if (col >= k){ /* only take upper triangular entry */ 1143 rtmp[col] = aa[j]; 1144 *bval++ = 0.0; /* for in-place factorization */ 1145 } 1146 } 1147 /* damp the diagonal of the matrix */ 1148 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1149 1150 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1151 dk = rtmp[k]; 1152 i = jl[k]; /* first row to be added to k_th row */ 1153 1154 while (i < k){ 1155 nexti = jl[i]; /* next row to be added to k_th row */ 1156 1157 /* compute multiplier, update diag(k) and U(i,k) */ 1158 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1159 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1160 dk += uikdi*ba[ili]; 1161 ba[ili] = uikdi; /* -U(i,k) */ 1162 1163 /* add multiple of row i to k-th row */ 1164 jmin = ili + 1; jmax = bi[i+1]; 1165 if (jmin < jmax){ 1166 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1167 /* update il and jl for row i */ 1168 il[i] = jmin; 1169 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1170 } 1171 i = nexti; 1172 } 1173 1174 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1175 /* calculate a shift that would make this row diagonally dominant */ 1176 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1177 jmin = bi[k]+1; 1178 nz = bi[k+1] - jmin; 1179 if (nz){ 1180 bcol = bj + jmin; 1181 bval = ba + jmin; 1182 while (nz--){ 1183 rs += PetscAbsScalar(rtmp[*bcol++]); 1184 } 1185 } 1186 /* if this shift is less than the previous, just up the previous 1187 one by a bit */ 1188 shift_amount = PetscMax(rs,1.1*shift_amount); 1189 chshift = PETSC_TRUE; 1190 /* Unlike in the ILU case there is no exit condition on nshift: 1191 we increase the shift until it converges. There is no guarantee that 1192 this algorithm converges faster or slower, or is better or worse 1193 than the ILU algorithm. */ 1194 nshift++; 1195 break; 1196 } 1197 if (PetscRealPart(dk) < zeropivot){ 1198 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1199 if (damping > 0.0) { 1200 if (ndamp) damping *= 2.0; 1201 damp = PETSC_TRUE; 1202 ndamp++; 1203 break; 1204 } else if (PetscAbsScalar(dk) < zeropivot){ 1205 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1206 } else { 1207 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1208 } 1209 } 1210 1211 /* copy data into U(k,:) */ 1212 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1213 jmin = bi[k]+1; jmax = bi[k+1]; 1214 if (jmin < jmax) { 1215 for (j=jmin; j<jmax; j++){ 1216 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1217 } 1218 /* add the k-th row into il and jl */ 1219 il[k] = jmin; 1220 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1221 } 1222 } 1223 } while (damp||chshift); 1224 ierr = PetscFree(il);CHKERRQ(ierr); 1225 1226 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1227 C->factor = FACTOR_CHOLESKY; 1228 C->assembled = PETSC_TRUE; 1229 C->preallocated = PETSC_TRUE; 1230 PetscLogFlops(C->m); 1231 if (ndamp) { 1232 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 1233 } 1234 if (nshift) { 1235 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ diagonal shifted %D shifts\n",nshift); 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering" 1241 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact) 1242 { 1243 Mat C = *fact; 1244 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1245 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1246 PetscErrorCode ierr; 1247 PetscInt i,j,am = A->m; 1248 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1249 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1250 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1251 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 1252 PetscTruth damp,chshift; 1253 PetscInt nshift=0; 1254 1255 PetscFunctionBegin; 1256 /* initialization */ 1257 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 1258 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1259 jl = il + am; 1260 rtmp = (MatScalar*)(jl + am); 1261 1262 shift_amount = 0; 1263 do { 1264 damp = PETSC_FALSE; 1265 chshift = PETSC_FALSE; 1266 for (i=0; i<am; i++) { 1267 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 1268 } 1269 1270 for (k = 0; k<am; k++){ 1271 /* initialize k-th row with elements nonzero in row perm(k) of A */ 1272 nz = ai[k+1] - ai[k]; 1273 acol = aj + ai[k]; 1274 aval = aa + ai[k]; 1275 bval = ba + bi[k]; 1276 while (nz -- ){ 1277 if (*acol < k) { /* skip lower triangular entries */ 1278 acol++; aval++; 1279 } else { 1280 rtmp[*acol++] = *aval++; 1281 *bval++ = 0.0; /* for in-place factorization */ 1282 } 1283 } 1284 /* damp the diagonal of the matrix */ 1285 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1286 1287 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1288 dk = rtmp[k]; 1289 i = jl[k]; /* first row to be added to k_th row */ 1290 1291 while (i < k){ 1292 nexti = jl[i]; /* next row to be added to k_th row */ 1293 /* compute multiplier, update D(k) and U(i,k) */ 1294 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1295 uikdi = - ba[ili]*ba[bi[i]]; 1296 dk += uikdi*ba[ili]; 1297 ba[ili] = uikdi; /* -U(i,k) */ 1298 1299 /* add multiple of row i to k-th row ... */ 1300 jmin = ili + 1; 1301 nz = bi[i+1] - jmin; 1302 if (nz > 0){ 1303 bcol = bj + jmin; 1304 bval = ba + jmin; 1305 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1306 /* update il and jl for i-th row */ 1307 il[i] = jmin; 1308 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1309 } 1310 i = nexti; 1311 } 1312 1313 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1314 /* calculate a shift that would make this row diagonally dominant */ 1315 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1316 jmin = bi[k]+1; 1317 nz = bi[k+1] - jmin; 1318 if (nz){ 1319 bcol = bj + jmin; 1320 bval = ba + jmin; 1321 while (nz--){ 1322 rs += PetscAbsScalar(rtmp[*bcol++]); 1323 } 1324 } 1325 /* if this shift is less than the previous, just up the previous 1326 one by a bit */ 1327 shift_amount = PetscMax(rs,1.1*shift_amount); 1328 chshift = PETSC_TRUE; 1329 /* Unlike in the ILU case there is no exit condition on nshift: 1330 we increase the shift until it converges. There is no guarantee that 1331 this algorithm converges faster or slower, or is better or worse 1332 than the ILU algorithm. */ 1333 nshift++; 1334 break; 1335 } 1336 if (PetscRealPart(dk) < zeropivot){ 1337 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1338 if (damping > 0.0) { 1339 if (ndamp) damping *= 2.0; 1340 damp = PETSC_TRUE; 1341 ndamp++; 1342 break; 1343 } else if (PetscAbsScalar(dk) < zeropivot){ 1344 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1345 } else { 1346 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1347 } 1348 } 1349 1350 /* copy data into U(k,:) */ 1351 ba[bi[k]] = 1.0/dk; 1352 jmin = bi[k]+1; 1353 nz = bi[k+1] - jmin; 1354 if (nz){ 1355 bcol = bj + jmin; 1356 bval = ba + jmin; 1357 while (nz--){ 1358 *bval++ = rtmp[*bcol]; 1359 rtmp[*bcol++] = 0.0; 1360 } 1361 /* add k-th row into il and jl */ 1362 il[k] = jmin; 1363 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1364 } 1365 } 1366 } while (damp||chshift); 1367 ierr = PetscFree(il);CHKERRQ(ierr); 1368 1369 C->factor = FACTOR_CHOLESKY; 1370 C->assembled = PETSC_TRUE; 1371 C->preallocated = PETSC_TRUE; 1372 PetscLogFlops(C->m); 1373 if (ndamp) { 1374 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1375 } 1376 if (nshift) { 1377 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #include "petscbt.h" 1383 #include "src/mat/utils/freespace.h" 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1386 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1387 { 1388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1389 Mat_SeqSBAIJ *b; 1390 Mat B; 1391 PetscErrorCode ierr; 1392 PetscTruth perm_identity; 1393 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1394 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1395 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1396 PetscReal fill=info->fill,levels=info->levels; 1397 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1398 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1399 PetscBT lnkbt; 1400 1401 PetscFunctionBegin; 1402 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1403 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1404 1405 /* special case that simply copies fill pattern */ 1406 if (!levels && perm_identity) { 1407 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1408 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1409 for (i=0; i<am; i++) { 1410 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1411 } 1412 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1413 B = *fact; 1414 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1415 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1416 1417 b = (Mat_SeqSBAIJ*)B->data; 1418 uj = b->j; 1419 for (i=0; i<am; i++) { 1420 aj = a->j + a->diag[i]; 1421 for (j=0; j<ui[i]; j++){ 1422 *uj++ = *aj++; 1423 } 1424 b->ilen[i] = ui[i]; 1425 } 1426 ierr = PetscFree(ui);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 1430 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1431 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1432 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1433 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1434 PetscFunctionReturn(0); 1435 } 1436 1437 /* initialization */ 1438 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1439 ui[0] = 0; 1440 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1441 1442 /* jl: linked list for storing indices of the pivot rows 1443 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1444 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1445 il = jl + am; 1446 uj_ptr = (PetscInt**)(il + am); 1447 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1448 for (i=0; i<am; i++){ 1449 jl[i] = am; il[i] = 0; 1450 } 1451 1452 /* create and initialize a linked list for storing column indices of the active row k */ 1453 nlnk = am + 1; 1454 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1455 1456 /* initial FreeSpace size is fill*(ai[am]+1) */ 1457 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1458 current_space = free_space; 1459 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1460 current_space_lvl = free_space_lvl; 1461 1462 for (k=0; k<am; k++){ /* for each active row k */ 1463 /* initialize lnk by the column indices of row rip[k] of A */ 1464 nzk = 0; 1465 ncols = ai[rip[k]+1] - ai[rip[k]]; 1466 ncols_upper = 0; 1467 cols = cols_lvl + am; 1468 for (j=0; j<ncols; j++){ 1469 i = rip[*(aj + ai[rip[k]] + j)]; 1470 if (i >= k){ /* only take upper triangular entry */ 1471 cols[ncols_upper] = i; 1472 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1473 ncols_upper++; 1474 } 1475 } 1476 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1477 nzk += nlnk; 1478 1479 /* update lnk by computing fill-in for each pivot row to be merged in */ 1480 prow = jl[k]; /* 1st pivot row */ 1481 1482 while (prow < k){ 1483 nextprow = jl[prow]; 1484 1485 /* merge prow into k-th row */ 1486 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1487 jmax = ui[prow+1]; 1488 ncols = jmax-jmin; 1489 i = jmin - ui[prow]; 1490 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1491 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1492 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1493 nzk += nlnk; 1494 1495 /* update il and jl for prow */ 1496 if (jmin < jmax){ 1497 il[prow] = jmin; 1498 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1499 } 1500 prow = nextprow; 1501 } 1502 1503 /* if free space is not available, make more free space */ 1504 if (current_space->local_remaining<nzk) { 1505 i = am - k + 1; /* num of unfactored rows */ 1506 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1507 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1508 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1509 reallocs++; 1510 } 1511 1512 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1513 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1514 1515 /* add the k-th row into il and jl */ 1516 if (nzk-1 > 0){ 1517 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1518 jl[k] = jl[i]; jl[i] = k; 1519 il[k] = ui[k] + 1; 1520 } 1521 uj_ptr[k] = current_space->array; 1522 uj_lvl_ptr[k] = current_space_lvl->array; 1523 1524 current_space->array += nzk; 1525 current_space->local_used += nzk; 1526 current_space->local_remaining -= nzk; 1527 1528 current_space_lvl->array += nzk; 1529 current_space_lvl->local_used += nzk; 1530 current_space_lvl->local_remaining -= nzk; 1531 1532 ui[k+1] = ui[k] + nzk; 1533 } 1534 1535 if (ai[am] != 0) { 1536 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 1537 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1538 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1539 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1540 } else { 1541 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1542 } 1543 1544 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1545 ierr = PetscFree(jl);CHKERRQ(ierr); 1546 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1547 1548 /* destroy list of free space and other temporary array(s) */ 1549 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1550 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1551 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1552 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1553 1554 /* put together the new matrix in MATSEQSBAIJ format */ 1555 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1556 B = *fact; 1557 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1558 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1559 1560 b = (Mat_SeqSBAIJ*)B->data; 1561 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1562 b->singlemalloc = PETSC_FALSE; 1563 /* the next line frees the default space generated by the Create() */ 1564 ierr = PetscFree(b->a);CHKERRQ(ierr); 1565 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1566 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1567 b->j = uj; 1568 b->i = ui; 1569 b->diag = 0; 1570 b->ilen = 0; 1571 b->imax = 0; 1572 b->row = perm; 1573 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1574 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1575 b->icol = perm; 1576 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1577 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1578 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1579 b->maxnz = b->nz = ui[am]; 1580 1581 B->factor = FACTOR_CHOLESKY; 1582 B->info.factor_mallocs = reallocs; 1583 B->info.fill_ratio_given = fill; 1584 if (ai[am] != 0) { 1585 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1586 } else { 1587 B->info.fill_ratio_needed = 0.0; 1588 } 1589 if (perm_identity){ 1590 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1591 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1593 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1594 } else { 1595 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1602 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1603 { 1604 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1605 Mat_SeqSBAIJ *b; 1606 Mat B; 1607 PetscErrorCode ierr; 1608 PetscTruth perm_identity; 1609 PetscReal fill = info->fill; 1610 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1611 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1612 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1613 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1614 PetscBT lnkbt; 1615 IS iperm; 1616 1617 PetscFunctionBegin; 1618 /* check whether perm is the identity mapping */ 1619 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1620 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1621 1622 if (!perm_identity){ 1623 /* check if perm is symmetric! */ 1624 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1625 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1626 for (i=0; i<mbs; i++) { 1627 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1628 } 1629 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1630 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1631 } 1632 1633 /* initialization */ 1634 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1635 ui[0] = 0; 1636 1637 /* jl: linked list for storing indices of the pivot rows 1638 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1639 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1640 il = jl + mbs; 1641 cols = il + mbs; 1642 ui_ptr = (PetscInt**)(cols + mbs); 1643 for (i=0; i<mbs; i++){ 1644 jl[i] = mbs; il[i] = 0; 1645 } 1646 1647 /* create and initialize a linked list for storing column indices of the active row k */ 1648 nlnk = mbs + 1; 1649 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1650 1651 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1652 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1653 current_space = free_space; 1654 1655 for (k=0; k<mbs; k++){ /* for each active row k */ 1656 /* initialize lnk by the column indices of row rip[k] of A */ 1657 nzk = 0; 1658 ncols = ai[rip[k]+1] - ai[rip[k]]; 1659 ncols_upper = 0; 1660 for (j=0; j<ncols; j++){ 1661 i = rip[*(aj + ai[rip[k]] + j)]; 1662 if (i >= k){ /* only take upper triangular entry */ 1663 cols[ncols_upper] = i; 1664 ncols_upper++; 1665 } 1666 } 1667 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1668 nzk += nlnk; 1669 1670 /* update lnk by computing fill-in for each pivot row to be merged in */ 1671 prow = jl[k]; /* 1st pivot row */ 1672 1673 while (prow < k){ 1674 nextprow = jl[prow]; 1675 /* merge prow into k-th row */ 1676 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1677 jmax = ui[prow+1]; 1678 ncols = jmax-jmin; 1679 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1680 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1681 nzk += nlnk; 1682 1683 /* update il and jl for prow */ 1684 if (jmin < jmax){ 1685 il[prow] = jmin; 1686 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1687 } 1688 prow = nextprow; 1689 } 1690 1691 /* if free space is not available, make more free space */ 1692 if (current_space->local_remaining<nzk) { 1693 i = mbs - k + 1; /* num of unfactored rows */ 1694 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1695 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1696 reallocs++; 1697 } 1698 1699 /* copy data into free space, then initialize lnk */ 1700 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1701 1702 /* add the k-th row into il and jl */ 1703 if (nzk-1 > 0){ 1704 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1705 jl[k] = jl[i]; jl[i] = k; 1706 il[k] = ui[k] + 1; 1707 } 1708 ui_ptr[k] = current_space->array; 1709 current_space->array += nzk; 1710 current_space->local_used += nzk; 1711 current_space->local_remaining -= nzk; 1712 1713 ui[k+1] = ui[k] + nzk; 1714 } 1715 1716 if (ai[mbs] != 0) { 1717 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1718 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1719 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1720 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1721 } else { 1722 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1723 } 1724 1725 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1726 ierr = PetscFree(jl);CHKERRQ(ierr); 1727 1728 /* destroy list of free space and other temporary array(s) */ 1729 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1730 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1731 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1732 1733 /* put together the new matrix in MATSEQSBAIJ format */ 1734 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1735 B = *fact; 1736 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1737 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1738 1739 b = (Mat_SeqSBAIJ*)B->data; 1740 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1741 b->singlemalloc = PETSC_FALSE; 1742 /* the next line frees the default space generated by the Create() */ 1743 ierr = PetscFree(b->a);CHKERRQ(ierr); 1744 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1745 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1746 b->j = uj; 1747 b->i = ui; 1748 b->diag = 0; 1749 b->ilen = 0; 1750 b->imax = 0; 1751 b->row = perm; 1752 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1753 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1754 b->icol = perm; 1755 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1756 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1757 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1758 b->maxnz = b->nz = ui[mbs]; 1759 1760 B->factor = FACTOR_CHOLESKY; 1761 B->info.factor_mallocs = reallocs; 1762 B->info.fill_ratio_given = fill; 1763 if (ai[mbs] != 0) { 1764 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1765 } else { 1766 B->info.fill_ratio_needed = 0.0; 1767 } 1768 if (perm_identity){ 1769 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1770 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1771 ierr = PetscObjectComposeFunction((PetscObject)(*fact),"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1772 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1773 } else { 1774 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778