1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #include "src/ksp/pc/impls/factor/factor.h" 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 426 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 427 { 428 Mat C=*B; 429 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 430 IS isrow = b->row,isicol = b->icol; 431 PetscErrorCode ierr; 432 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 433 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 434 PetscInt *diag_offset = b->diag,diag,*pj; 435 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 436 PetscReal zeropivot,rs,d,shift_nonzero; 437 PetscReal row_shift,shift_top=0.; 438 PetscTruth shift_pd; 439 Shift_Ctx sctx; 440 PetscInt newshift; 441 442 PetscFunctionBegin; 443 shift_nonzero = info->shiftnz; 444 shift_pd = info->shiftpd; 445 zeropivot = info->zeropivot; 446 447 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 448 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 449 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 450 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 451 rtmps = rtmp; ics = ic; 452 453 if (!a->diag) { 454 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 455 } 456 /* if both shift schemes are chosen by user, only use shift_pd */ 457 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 458 if (shift_pd) { /* set shift_top=max{row_shift} */ 459 PetscInt *aai = a->i,*ddiag = a->diag; 460 shift_top = 0; 461 for (i=0; i<n; i++) { 462 d = PetscAbsScalar((a->a)[ddiag[i]]); 463 /* calculate amt of shift needed for this row */ 464 if (d<=0) { 465 row_shift = 0; 466 } else { 467 row_shift = -2*d; 468 } 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 row_shift += PetscAbsScalar(v[j]); 473 if (row_shift>shift_top) shift_top = row_shift; 474 } 475 } 476 477 sctx.shift_amount = 0; 478 sctx.shift_top = shift_top; 479 sctx.nshift = 0; 480 sctx.nshift_max = 5; 481 sctx.shift_lo = 0.; 482 sctx.shift_hi = 1.; 483 do { 484 sctx.lushift = PETSC_FALSE; 485 for (i=0; i<n; i++){ 486 nz = ai[i+1] - ai[i]; 487 ajtmp = aj + ai[i]; 488 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 489 490 /* load in initial (unfactored row) */ 491 nz = a->i[r[i]+1] - a->i[r[i]]; 492 ajtmpold = a->j + a->i[r[i]]; 493 v = a->a + a->i[r[i]]; 494 for (j=0; j<nz; j++) { 495 rtmp[ics[ajtmpold[j]]] = v[j]; 496 } 497 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 498 499 row = *ajtmp++; 500 while (row < i) { 501 pc = rtmp + row; 502 if (*pc != 0.0) { 503 pv = b->a + diag_offset[row]; 504 pj = b->j + diag_offset[row] + 1; 505 multiplier = *pc / *pv++; 506 *pc = multiplier; 507 nz = ai[row+1] - diag_offset[row] - 1; 508 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 509 PetscLogFlops(2*nz); 510 } 511 row = *ajtmp++; 512 } 513 /* finished row so stick it into b->a */ 514 pv = b->a + ai[i] ; 515 pj = b->j + ai[i] ; 516 nz = ai[i+1] - ai[i]; 517 diag = diag_offset[i] - ai[i]; 518 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 sctx.rs = rs; 526 sctx.pv = pv[diag]; 527 newshift = 0; 528 ierr = PCLUFactorCheckShift(A,info,B,&sctx,&newshift);CHKERRQ(ierr); 529 if (newshift == 1){ 530 break; 531 } else if (newshift == -1){ 532 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 533 } 534 } 535 536 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 537 /* 538 * if no shift in this attempt & shifting & started shifting & can refine, 539 * then try lower shift 540 */ 541 sctx.shift_hi = info->shift_fraction; 542 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 543 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 544 sctx.lushift = PETSC_TRUE; 545 sctx.nshift++; 546 } 547 } while (sctx.lushift); 548 549 /* invert diagonal entries for simplier triangular solves */ 550 for (i=0; i<n; i++) { 551 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 552 } 553 554 ierr = PetscFree(rtmp);CHKERRQ(ierr); 555 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 556 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 557 C->factor = FACTOR_LU; 558 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 559 C->assembled = PETSC_TRUE; 560 PetscLogFlops(C->n); 561 if (sctx.nshift){ 562 if (shift_nonzero) { 563 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 564 } else if (shift_pd) { 565 b->lu_shift_fraction = info->shift_fraction; 566 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 567 } 568 } 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 574 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 575 { 576 PetscFunctionBegin; 577 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 578 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 579 PetscFunctionReturn(0); 580 } 581 582 583 /* ----------------------------------------------------------- */ 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatLUFactor_SeqAIJ" 586 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 587 { 588 PetscErrorCode ierr; 589 Mat C; 590 591 PetscFunctionBegin; 592 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 593 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 594 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 595 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 596 PetscFunctionReturn(0); 597 } 598 /* ----------------------------------------------------------- */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatSolve_SeqAIJ" 601 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 602 { 603 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 604 IS iscol = a->col,isrow = a->row; 605 PetscErrorCode ierr; 606 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 607 PetscInt nz,*rout,*cout; 608 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 609 610 PetscFunctionBegin; 611 if (!n) PetscFunctionReturn(0); 612 613 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 615 tmp = a->solve_work; 616 617 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 618 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 619 620 /* forward solve the lower triangular */ 621 tmp[0] = b[*r++]; 622 tmps = tmp; 623 for (i=1; i<n; i++) { 624 v = aa + ai[i] ; 625 vi = aj + ai[i] ; 626 nz = a->diag[i] - ai[i]; 627 sum = b[*r++]; 628 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 629 tmp[i] = sum; 630 } 631 632 /* backward solve the upper triangular */ 633 for (i=n-1; i>=0; i--){ 634 v = aa + a->diag[i] + 1; 635 vi = aj + a->diag[i] + 1; 636 nz = ai[i+1] - a->diag[i] - 1; 637 sum = tmp[i]; 638 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 639 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 640 } 641 642 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 643 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 644 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 646 PetscLogFlops(2*a->nz - A->n); 647 PetscFunctionReturn(0); 648 } 649 650 /* ----------------------------------------------------------- */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 653 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 654 { 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 656 PetscErrorCode ierr; 657 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 658 PetscScalar *x,*b,*aa = a->a; 659 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 660 PetscInt adiag_i,i,*vi,nz,ai_i; 661 PetscScalar *v,sum; 662 #endif 663 664 PetscFunctionBegin; 665 if (!n) PetscFunctionReturn(0); 666 667 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 668 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 669 670 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 671 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 672 #else 673 /* forward solve the lower triangular */ 674 x[0] = b[0]; 675 for (i=1; i<n; i++) { 676 ai_i = ai[i]; 677 v = aa + ai_i; 678 vi = aj + ai_i; 679 nz = adiag[i] - ai_i; 680 sum = b[i]; 681 while (nz--) sum -= *v++ * x[*vi++]; 682 x[i] = sum; 683 } 684 685 /* backward solve the upper triangular */ 686 for (i=n-1; i>=0; i--){ 687 adiag_i = adiag[i]; 688 v = aa + adiag_i + 1; 689 vi = aj + adiag_i + 1; 690 nz = ai[i+1] - adiag_i - 1; 691 sum = x[i]; 692 while (nz--) sum -= *v++ * x[*vi++]; 693 x[i] = sum*aa[adiag_i]; 694 } 695 #endif 696 PetscLogFlops(2*a->nz - A->n); 697 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 704 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 705 { 706 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 707 IS iscol = a->col,isrow = a->row; 708 PetscErrorCode ierr; 709 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 710 PetscInt nz,*rout,*cout; 711 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 712 713 PetscFunctionBegin; 714 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 715 716 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 tmp = a->solve_work; 719 720 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 721 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 722 723 /* forward solve the lower triangular */ 724 tmp[0] = b[*r++]; 725 for (i=1; i<n; i++) { 726 v = aa + ai[i] ; 727 vi = aj + ai[i] ; 728 nz = a->diag[i] - ai[i]; 729 sum = b[*r++]; 730 while (nz--) sum -= *v++ * tmp[*vi++ ]; 731 tmp[i] = sum; 732 } 733 734 /* backward solve the upper triangular */ 735 for (i=n-1; i>=0; i--){ 736 v = aa + a->diag[i] + 1; 737 vi = aj + a->diag[i] + 1; 738 nz = ai[i+1] - a->diag[i] - 1; 739 sum = tmp[i]; 740 while (nz--) sum -= *v++ * tmp[*vi++ ]; 741 tmp[i] = sum*aa[a->diag[i]]; 742 x[*c--] += tmp[i]; 743 } 744 745 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 PetscLogFlops(2*a->nz); 750 751 PetscFunctionReturn(0); 752 } 753 /* -------------------------------------------------------------------*/ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 756 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 757 { 758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 759 IS iscol = a->col,isrow = a->row; 760 PetscErrorCode ierr; 761 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 762 PetscInt nz,*rout,*cout,*diag = a->diag; 763 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 764 765 PetscFunctionBegin; 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 772 773 /* copy the b into temp work space according to permutation */ 774 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 775 776 /* forward solve the U^T */ 777 for (i=0; i<n; i++) { 778 v = aa + diag[i] ; 779 vi = aj + diag[i] + 1; 780 nz = ai[i+1] - diag[i] - 1; 781 s1 = tmp[i]; 782 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 783 while (nz--) { 784 tmp[*vi++ ] -= (*v++)*s1; 785 } 786 tmp[i] = s1; 787 } 788 789 /* backward solve the L^T */ 790 for (i=n-1; i>=0; i--){ 791 v = aa + diag[i] - 1 ; 792 vi = aj + diag[i] - 1 ; 793 nz = diag[i] - ai[i]; 794 s1 = tmp[i]; 795 while (nz--) { 796 tmp[*vi-- ] -= (*v--)*s1; 797 } 798 } 799 800 /* copy tmp into x according to permutation */ 801 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 802 803 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 804 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 807 808 PetscLogFlops(2*a->nz-A->n); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 814 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 815 { 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 817 IS iscol = a->col,isrow = a->row; 818 PetscErrorCode ierr; 819 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 820 PetscInt nz,*rout,*cout,*diag = a->diag; 821 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 822 823 PetscFunctionBegin; 824 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 825 826 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 827 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 828 tmp = a->solve_work; 829 830 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 831 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 832 833 /* copy the b into temp work space according to permutation */ 834 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 835 836 /* forward solve the U^T */ 837 for (i=0; i<n; i++) { 838 v = aa + diag[i] ; 839 vi = aj + diag[i] + 1; 840 nz = ai[i+1] - diag[i] - 1; 841 tmp[i] *= *v++; 842 while (nz--) { 843 tmp[*vi++ ] -= (*v++)*tmp[i]; 844 } 845 } 846 847 /* backward solve the L^T */ 848 for (i=n-1; i>=0; i--){ 849 v = aa + diag[i] - 1 ; 850 vi = aj + diag[i] - 1 ; 851 nz = diag[i] - ai[i]; 852 while (nz--) { 853 tmp[*vi-- ] -= (*v--)*tmp[i]; 854 } 855 } 856 857 /* copy tmp into x according to permutation */ 858 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 859 860 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 861 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 862 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 864 865 PetscLogFlops(2*a->nz); 866 PetscFunctionReturn(0); 867 } 868 /* ----------------------------------------------------------------*/ 869 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 873 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 874 { 875 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 876 IS isicol; 877 PetscErrorCode ierr; 878 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 879 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 880 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 881 PetscInt incrlev,nnz,i,levels,diagonal_fill; 882 PetscTruth col_identity,row_identity; 883 PetscReal f; 884 885 PetscFunctionBegin; 886 f = info->fill; 887 levels = (PetscInt)info->levels; 888 diagonal_fill = (PetscInt)info->diagonal_fill; 889 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 890 891 /* special case that simply copies fill pattern */ 892 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 893 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 894 if (!levels && row_identity && col_identity) { 895 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 896 (*fact)->factor = FACTOR_LU; 897 b = (Mat_SeqAIJ*)(*fact)->data; 898 if (!b->diag) { 899 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 } 901 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 902 b->row = isrow; 903 b->col = iscol; 904 b->icol = isicol; 905 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 906 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 907 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 908 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 913 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 914 915 /* get new row pointers */ 916 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 917 ainew[0] = 0; 918 /* don't know how many column pointers are needed so estimate */ 919 jmax = (PetscInt)(f*(ai[n]+1)); 920 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 921 /* ajfill is level of fill for each fill entry */ 922 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 923 /* fill is a linked list of nonzeros in active row */ 924 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 925 /* im is level for each filled value */ 926 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 927 /* dloc is location of diagonal in factor */ 928 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 929 dloc[0] = 0; 930 for (prow=0; prow<n; prow++) { 931 932 /* copy current row into linked list */ 933 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 934 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 935 xi = aj + ai[r[prow]]; 936 fill[n] = n; 937 fill[prow] = -1; /* marker to indicate if diagonal exists */ 938 while (nz--) { 939 fm = n; 940 idx = ic[*xi++ ]; 941 do { 942 m = fm; 943 fm = fill[m]; 944 } while (fm < idx); 945 fill[m] = idx; 946 fill[idx] = fm; 947 im[idx] = 0; 948 } 949 950 /* make sure diagonal entry is included */ 951 if (diagonal_fill && fill[prow] == -1) { 952 fm = n; 953 while (fill[fm] < prow) fm = fill[fm]; 954 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 955 fill[fm] = prow; 956 im[prow] = 0; 957 nzf++; 958 dcount++; 959 } 960 961 nzi = 0; 962 row = fill[n]; 963 while (row < prow) { 964 incrlev = im[row] + 1; 965 nz = dloc[row]; 966 xi = ajnew + ainew[row] + nz + 1; 967 flev = ajfill + ainew[row] + nz + 1; 968 nnz = ainew[row+1] - ainew[row] - nz - 1; 969 fm = row; 970 while (nnz-- > 0) { 971 idx = *xi++ ; 972 if (*flev + incrlev > levels) { 973 flev++; 974 continue; 975 } 976 do { 977 m = fm; 978 fm = fill[m]; 979 } while (fm < idx); 980 if (fm != idx) { 981 im[idx] = *flev + incrlev; 982 fill[m] = idx; 983 fill[idx] = fm; 984 fm = idx; 985 nzf++; 986 } else { 987 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 988 } 989 flev++; 990 } 991 row = fill[row]; 992 nzi++; 993 } 994 /* copy new filled row into permanent storage */ 995 ainew[prow+1] = ainew[prow] + nzf; 996 if (ainew[prow+1] > jmax) { 997 998 /* estimate how much additional space we will need */ 999 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1000 /* just double the memory each time */ 1001 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1002 PetscInt maxadd = jmax; 1003 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1004 jmax += maxadd; 1005 1006 /* allocate a longer ajnew and ajfill */ 1007 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1008 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1009 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1010 ajnew = xi; 1011 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1012 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1013 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1014 ajfill = xi; 1015 reallocs++; /* count how many times we realloc */ 1016 } 1017 xi = ajnew + ainew[prow] ; 1018 flev = ajfill + ainew[prow] ; 1019 dloc[prow] = nzi; 1020 fm = fill[n]; 1021 while (nzf--) { 1022 *xi++ = fm ; 1023 *flev++ = im[fm]; 1024 fm = fill[fm]; 1025 } 1026 /* make sure row has diagonal entry */ 1027 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1028 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1029 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1030 } 1031 } 1032 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1033 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1035 ierr = PetscFree(fill);CHKERRQ(ierr); 1036 ierr = PetscFree(im);CHKERRQ(ierr); 1037 1038 { 1039 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1044 if (diagonal_fill) { 1045 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1046 } 1047 } 1048 1049 /* put together the new matrix */ 1050 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1051 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1052 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1053 PetscLogObjectParent(*fact,isicol); 1054 b = (Mat_SeqAIJ*)(*fact)->data; 1055 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1056 b->singlemalloc = PETSC_FALSE; 1057 len = (ainew[n] )*sizeof(PetscScalar); 1058 /* the next line frees the default space generated by the Create() */ 1059 ierr = PetscFree(b->a);CHKERRQ(ierr); 1060 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1061 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1062 b->j = ajnew; 1063 b->i = ainew; 1064 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1065 b->diag = dloc; 1066 b->ilen = 0; 1067 b->imax = 0; 1068 b->row = isrow; 1069 b->col = iscol; 1070 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1071 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1072 b->icol = isicol; 1073 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1074 /* In b structure: Free imax, ilen, old a, old j. 1075 Allocate dloc, solve_work, new a, new j */ 1076 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1077 b->maxnz = b->nz = ainew[n] ; 1078 (*fact)->factor = FACTOR_LU; 1079 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1080 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1081 (*fact)->info.factor_mallocs = reallocs; 1082 (*fact)->info.fill_ratio_given = f; 1083 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #include "src/mat/impls/sbaij/seq/sbaij.h" 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1090 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1091 { 1092 Mat C = *B; 1093 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1094 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1095 IS ip=b->row; 1096 PetscErrorCode ierr; 1097 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1098 PetscInt *ai=a->i,*aj=a->j; 1099 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1100 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1101 PetscReal zeropivot,shift_amount,rs,shift_nonzero; 1102 PetscTruth chshift,shift_pd; 1103 PetscInt nshift=0; 1104 1105 PetscFunctionBegin; 1106 shift_nonzero = info->shiftnz; 1107 shift_pd = info->shiftpd; 1108 zeropivot = info->zeropivot; 1109 1110 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1111 1112 /* initialization */ 1113 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1114 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1115 jl = il + mbs; 1116 rtmp = (MatScalar*)(jl + mbs); 1117 1118 shift_amount = 0; 1119 do { 1120 chshift = PETSC_FALSE; 1121 for (i=0; i<mbs; i++) { 1122 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1123 } 1124 1125 for (k = 0; k<mbs; k++){ 1126 bval = ba + bi[k]; 1127 /* initialize k-th row by the perm[k]-th row of A */ 1128 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1129 for (j = jmin; j < jmax; j++){ 1130 col = rip[aj[j]]; 1131 if (col >= k){ /* only take upper triangular entry */ 1132 rtmp[col] = aa[j]; 1133 *bval++ = 0.0; /* for in-place factorization */ 1134 } 1135 } 1136 /* shift the diagonal of the matrix */ 1137 if (nshift) rtmp[k] += shift_amount; 1138 1139 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1140 dk = rtmp[k]; 1141 i = jl[k]; /* first row to be added to k_th row */ 1142 1143 while (i < k){ 1144 nexti = jl[i]; /* next row to be added to k_th row */ 1145 1146 /* compute multiplier, update diag(k) and U(i,k) */ 1147 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1148 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1149 dk += uikdi*ba[ili]; 1150 ba[ili] = uikdi; /* -U(i,k) */ 1151 1152 /* add multiple of row i to k-th row */ 1153 jmin = ili + 1; jmax = bi[i+1]; 1154 if (jmin < jmax){ 1155 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1156 /* update il and jl for row i */ 1157 il[i] = jmin; 1158 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1159 } 1160 i = nexti; 1161 } 1162 1163 /* shift the diagonals when zero pivot is detected */ 1164 /* compute rs=sum of abs(off-diagonal) */ 1165 rs = 0.0; 1166 jmin = bi[k]+1; 1167 nz = bi[k+1] - jmin; 1168 if (nz){ 1169 bcol = bj + jmin; 1170 while (nz--){ 1171 rs += PetscAbsScalar(rtmp[*bcol++]); 1172 } 1173 } 1174 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1175 /* force |diag(*B)| > zeropivot*rs */ 1176 if (!nshift){ 1177 shift_amount = shift_nonzero; 1178 } else { 1179 shift_amount *= 2.0; 1180 } 1181 chshift = PETSC_TRUE; 1182 nshift++; 1183 break; 1184 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1185 /* calculate a shift that would make this row diagonally dominant */ 1186 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1187 chshift = PETSC_TRUE; 1188 /* Unlike in the ILU case there is no exit condition on nshift: 1189 we increase the shift until it converges. There is no guarantee that 1190 this algorithm converges faster or slower, or is better or worse 1191 than the ILU algorithm. */ 1192 nshift++; 1193 break; 1194 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1195 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1196 } 1197 1198 /* copy data into U(k,:) */ 1199 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1200 jmin = bi[k]+1; jmax = bi[k+1]; 1201 if (jmin < jmax) { 1202 for (j=jmin; j<jmax; j++){ 1203 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1204 } 1205 /* add the k-th row into il and jl */ 1206 il[k] = jmin; 1207 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1208 } 1209 } 1210 } while (chshift); 1211 ierr = PetscFree(il);CHKERRQ(ierr); 1212 1213 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1214 C->factor = FACTOR_CHOLESKY; 1215 C->assembled = PETSC_TRUE; 1216 C->preallocated = PETSC_TRUE; 1217 PetscLogFlops(C->m); 1218 if (nshift){ 1219 if (shift_nonzero) { 1220 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1221 } else if (shift_pd) { 1222 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1223 } 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #include "petscbt.h" 1229 #include "src/mat/utils/freespace.h" 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1233 { 1234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1235 Mat_SeqSBAIJ *b; 1236 Mat B; 1237 PetscErrorCode ierr; 1238 PetscTruth perm_identity; 1239 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1240 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1241 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1242 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1243 PetscReal fill=info->fill,levels=info->levels; 1244 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1245 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1246 PetscBT lnkbt; 1247 1248 PetscFunctionBegin; 1249 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1250 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1251 1252 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1253 ui[0] = 0; 1254 1255 /* special case that simply copies fill pattern */ 1256 if (!levels && perm_identity) { 1257 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1258 for (i=0; i<am; i++) { 1259 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1260 } 1261 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1262 cols = uj; 1263 for (i=0; i<am; i++) { 1264 aj = a->j + a->diag[i]; 1265 ncols = ui[i+1] - ui[i]; 1266 for (j=0; j<ncols; j++) *cols++ = *aj++; 1267 } 1268 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1269 /* initialization */ 1270 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1271 1272 /* jl: linked list for storing indices of the pivot rows 1273 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1274 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1275 il = jl + am; 1276 uj_ptr = (PetscInt**)(il + am); 1277 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1278 for (i=0; i<am; i++){ 1279 jl[i] = am; il[i] = 0; 1280 } 1281 1282 /* create and initialize a linked list for storing column indices of the active row k */ 1283 nlnk = am + 1; 1284 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1285 1286 /* initial FreeSpace size is fill*(ai[am]+1) */ 1287 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1288 current_space = free_space; 1289 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1290 current_space_lvl = free_space_lvl; 1291 1292 for (k=0; k<am; k++){ /* for each active row k */ 1293 /* initialize lnk by the column indices of row rip[k] of A */ 1294 nzk = 0; 1295 ncols = ai[rip[k]+1] - ai[rip[k]]; 1296 ncols_upper = 0; 1297 cols = cols_lvl + am; 1298 for (j=0; j<ncols; j++){ 1299 i = rip[*(aj + ai[rip[k]] + j)]; 1300 if (i >= k){ /* only take upper triangular entry */ 1301 cols[ncols_upper] = i; 1302 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1303 ncols_upper++; 1304 } 1305 } 1306 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1307 nzk += nlnk; 1308 1309 /* update lnk by computing fill-in for each pivot row to be merged in */ 1310 prow = jl[k]; /* 1st pivot row */ 1311 1312 while (prow < k){ 1313 nextprow = jl[prow]; 1314 1315 /* merge prow into k-th row */ 1316 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1317 jmax = ui[prow+1]; 1318 ncols = jmax-jmin; 1319 i = jmin - ui[prow]; 1320 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1321 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1322 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1323 nzk += nlnk; 1324 1325 /* update il and jl for prow */ 1326 if (jmin < jmax){ 1327 il[prow] = jmin; 1328 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1329 } 1330 prow = nextprow; 1331 } 1332 1333 /* if free space is not available, make more free space */ 1334 if (current_space->local_remaining<nzk) { 1335 i = am - k + 1; /* num of unfactored rows */ 1336 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1337 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1338 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1339 reallocs++; 1340 } 1341 1342 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1343 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1344 1345 /* add the k-th row into il and jl */ 1346 if (nzk > 1){ 1347 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1348 jl[k] = jl[i]; jl[i] = k; 1349 il[k] = ui[k] + 1; 1350 } 1351 uj_ptr[k] = current_space->array; 1352 uj_lvl_ptr[k] = current_space_lvl->array; 1353 1354 current_space->array += nzk; 1355 current_space->local_used += nzk; 1356 current_space->local_remaining -= nzk; 1357 1358 current_space_lvl->array += nzk; 1359 current_space_lvl->local_used += nzk; 1360 current_space_lvl->local_remaining -= nzk; 1361 1362 ui[k+1] = ui[k] + nzk; 1363 } 1364 1365 if (ai[am] != 0) { 1366 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1367 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1368 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1369 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1370 } else { 1371 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1372 } 1373 1374 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1375 ierr = PetscFree(jl);CHKERRQ(ierr); 1376 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1377 1378 /* destroy list of free space and other temporary array(s) */ 1379 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1380 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1381 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1382 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1383 1384 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1385 1386 /* put together the new matrix in MATSEQSBAIJ format */ 1387 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1388 B = *fact; 1389 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1390 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1391 1392 b = (Mat_SeqSBAIJ*)B->data; 1393 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1394 b->singlemalloc = PETSC_FALSE; 1395 /* the next line frees the default space generated by the Create() */ 1396 ierr = PetscFree(b->a);CHKERRQ(ierr); 1397 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1398 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1399 b->j = uj; 1400 b->i = ui; 1401 b->diag = 0; 1402 b->ilen = 0; 1403 b->imax = 0; 1404 b->row = perm; 1405 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1406 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1407 b->icol = perm; 1408 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1409 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1410 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1411 b->maxnz = b->nz = ui[am]; 1412 1413 B->factor = FACTOR_CHOLESKY; 1414 B->info.factor_mallocs = reallocs; 1415 B->info.fill_ratio_given = fill; 1416 if (ai[am] != 0) { 1417 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1418 } else { 1419 B->info.fill_ratio_needed = 0.0; 1420 } 1421 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1422 if (perm_identity){ 1423 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1424 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1431 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1432 { 1433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1434 Mat_SeqSBAIJ *b; 1435 Mat B; 1436 PetscErrorCode ierr; 1437 PetscTruth perm_identity; 1438 PetscReal fill = info->fill; 1439 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1440 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1441 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1442 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1443 PetscBT lnkbt; 1444 IS iperm; 1445 1446 PetscFunctionBegin; 1447 /* check whether perm is the identity mapping */ 1448 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1449 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1450 1451 if (!perm_identity){ 1452 /* check if perm is symmetric! */ 1453 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1454 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1455 for (i=0; i<mbs; i++) { 1456 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1457 } 1458 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1459 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1460 } 1461 1462 /* initialization */ 1463 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1464 ui[0] = 0; 1465 1466 /* jl: linked list for storing indices of the pivot rows 1467 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1468 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1469 il = jl + mbs; 1470 cols = il + mbs; 1471 ui_ptr = (PetscInt**)(cols + mbs); 1472 for (i=0; i<mbs; i++){ 1473 jl[i] = mbs; il[i] = 0; 1474 } 1475 1476 /* create and initialize a linked list for storing column indices of the active row k */ 1477 nlnk = mbs + 1; 1478 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1479 1480 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1481 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1482 current_space = free_space; 1483 1484 for (k=0; k<mbs; k++){ /* for each active row k */ 1485 /* initialize lnk by the column indices of row rip[k] of A */ 1486 nzk = 0; 1487 ncols = ai[rip[k]+1] - ai[rip[k]]; 1488 ncols_upper = 0; 1489 for (j=0; j<ncols; j++){ 1490 i = rip[*(aj + ai[rip[k]] + j)]; 1491 if (i >= k){ /* only take upper triangular entry */ 1492 cols[ncols_upper] = i; 1493 ncols_upper++; 1494 } 1495 } 1496 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1497 nzk += nlnk; 1498 1499 /* update lnk by computing fill-in for each pivot row to be merged in */ 1500 prow = jl[k]; /* 1st pivot row */ 1501 1502 while (prow < k){ 1503 nextprow = jl[prow]; 1504 /* merge prow into k-th row */ 1505 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1506 jmax = ui[prow+1]; 1507 ncols = jmax-jmin; 1508 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1509 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1510 nzk += nlnk; 1511 1512 /* update il and jl for prow */ 1513 if (jmin < jmax){ 1514 il[prow] = jmin; 1515 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1516 } 1517 prow = nextprow; 1518 } 1519 1520 /* if free space is not available, make more free space */ 1521 if (current_space->local_remaining<nzk) { 1522 i = mbs - k + 1; /* num of unfactored rows */ 1523 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1524 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1525 reallocs++; 1526 } 1527 1528 /* copy data into free space, then initialize lnk */ 1529 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1530 1531 /* add the k-th row into il and jl */ 1532 if (nzk-1 > 0){ 1533 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1534 jl[k] = jl[i]; jl[i] = k; 1535 il[k] = ui[k] + 1; 1536 } 1537 ui_ptr[k] = current_space->array; 1538 current_space->array += nzk; 1539 current_space->local_used += nzk; 1540 current_space->local_remaining -= nzk; 1541 1542 ui[k+1] = ui[k] + nzk; 1543 } 1544 1545 if (ai[mbs] != 0) { 1546 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1547 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1548 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1549 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1550 } else { 1551 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1552 } 1553 1554 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1555 ierr = PetscFree(jl);CHKERRQ(ierr); 1556 1557 /* destroy list of free space and other temporary array(s) */ 1558 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1559 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1560 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1561 1562 /* put together the new matrix in MATSEQSBAIJ format */ 1563 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1564 B = *fact; 1565 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1566 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1567 1568 b = (Mat_SeqSBAIJ*)B->data; 1569 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1570 b->singlemalloc = PETSC_FALSE; 1571 /* the next line frees the default space generated by the Create() */ 1572 ierr = PetscFree(b->a);CHKERRQ(ierr); 1573 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1574 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1575 b->j = uj; 1576 b->i = ui; 1577 b->diag = 0; 1578 b->ilen = 0; 1579 b->imax = 0; 1580 b->row = perm; 1581 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1582 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1583 b->icol = perm; 1584 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1585 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1586 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1587 b->maxnz = b->nz = ui[mbs]; 1588 1589 B->factor = FACTOR_CHOLESKY; 1590 B->info.factor_mallocs = reallocs; 1591 B->info.fill_ratio_given = fill; 1592 if (ai[mbs] != 0) { 1593 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1594 } else { 1595 B->info.fill_ratio_needed = 0.0; 1596 } 1597 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1598 if (perm_identity){ 1599 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1600 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604