1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #include "src/ksp/pc/impls/factor/factor.h" 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 426 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 427 { 428 Mat C=*B; 429 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 430 IS isrow = b->row,isicol = b->icol; 431 PetscErrorCode ierr; 432 PetscInt *r,*ic,i,j,n=A->m,*ai=b->i,*aj=b->j; 433 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 434 PetscInt *diag_offset = b->diag,diag,*pj; 435 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 436 PetscReal zeropivot,rs,d,shift_nonzero; 437 PetscReal row_shift,shift_top=0.; 438 PetscTruth shift_pd; 439 Shift_Ctx sctx; 440 PetscInt newshift; 441 442 PetscFunctionBegin; 443 shift_nonzero = info->shiftnz; 444 shift_pd = info->shiftpd; 445 zeropivot = info->zeropivot; 446 447 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 448 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 449 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 450 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 451 rtmps = rtmp; ics = ic; 452 453 if (!a->diag) { 454 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 455 } 456 /* if both shift schemes are chosen by user, only use shift_pd */ 457 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 458 if (shift_pd) { /* set shift_top=max{row_shift} */ 459 PetscInt *aai = a->i,*ddiag = a->diag; 460 shift_top = 0; 461 for (i=0; i<n; i++) { 462 d = PetscAbsScalar((a->a)[ddiag[i]]); 463 /* calculate amt of shift needed for this row */ 464 if (d<=0) { 465 row_shift = 0; 466 } else { 467 row_shift = -2*d; 468 } 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 row_shift += PetscAbsScalar(v[j]); 473 if (row_shift>shift_top) shift_top = row_shift; 474 } 475 } 476 477 sctx.shift_amount = 0; 478 sctx.shift_top = shift_top; 479 sctx.nshift = 0; 480 sctx.nshift_max = 5; 481 sctx.shift_lo = 0.; 482 sctx.shift_hi = 1.; 483 do { 484 sctx.lushift = PETSC_FALSE; 485 for (i=0; i<n; i++){ 486 nz = ai[i+1] - ai[i]; 487 ajtmp = aj + ai[i]; 488 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 489 490 /* load in initial (unfactored row) */ 491 nz = a->i[r[i]+1] - a->i[r[i]]; 492 ajtmpold = a->j + a->i[r[i]]; 493 v = a->a + a->i[r[i]]; 494 for (j=0; j<nz; j++) { 495 rtmp[ics[ajtmpold[j]]] = v[j]; 496 } 497 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 498 499 row = *ajtmp++; 500 while (row < i) { 501 pc = rtmp + row; 502 if (*pc != 0.0) { 503 pv = b->a + diag_offset[row]; 504 pj = b->j + diag_offset[row] + 1; 505 multiplier = *pc / *pv++; 506 *pc = multiplier; 507 nz = ai[row+1] - diag_offset[row] - 1; 508 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 509 PetscLogFlops(2*nz); 510 } 511 row = *ajtmp++; 512 } 513 /* finished row so stick it into b->a */ 514 pv = b->a + ai[i] ; 515 pj = b->j + ai[i] ; 516 nz = ai[i+1] - ai[i]; 517 diag = diag_offset[i] - ai[i]; 518 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 sctx.rs = rs; 526 sctx.pv = pv[diag]; 527 newshift = 0; 528 ierr = PCLUFactorCheckShift(A,info,B,&sctx,&newshift);CHKERRQ(ierr); 529 if (newshift == 1){ 530 break; 531 } else if (newshift == -1){ 532 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 533 } 534 } 535 536 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 537 /* 538 * if no shift in this attempt & shifting & started shifting & can refine, 539 * then try lower shift 540 */ 541 sctx.shift_hi = info->shift_fraction; 542 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 543 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 544 sctx.lushift = PETSC_TRUE; 545 sctx.nshift++; 546 } 547 } while (sctx.lushift); 548 549 /* invert diagonal entries for simplier triangular solves */ 550 for (i=0; i<n; i++) { 551 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 552 } 553 554 ierr = PetscFree(rtmp);CHKERRQ(ierr); 555 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 556 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 557 C->factor = FACTOR_LU; 558 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 559 C->assembled = PETSC_TRUE; 560 PetscLogFlops(C->n); 561 if (sctx.nshift){ 562 if (shift_nonzero) { 563 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 564 } else if (shift_pd) { 565 b->lu_shift_fraction = info->shift_fraction; 566 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 567 } 568 } 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 574 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 575 { 576 PetscFunctionBegin; 577 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 578 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 579 PetscFunctionReturn(0); 580 } 581 582 583 /* ----------------------------------------------------------- */ 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatLUFactor_SeqAIJ" 586 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 587 { 588 PetscErrorCode ierr; 589 Mat C; 590 591 PetscFunctionBegin; 592 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 593 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 594 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 595 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 596 PetscFunctionReturn(0); 597 } 598 /* ----------------------------------------------------------- */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatSolve_SeqAIJ" 601 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 602 { 603 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 604 IS iscol = a->col,isrow = a->row; 605 PetscErrorCode ierr; 606 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 607 PetscInt nz,*rout,*cout; 608 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 609 610 PetscFunctionBegin; 611 if (!n) PetscFunctionReturn(0); 612 613 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 615 tmp = a->solve_work; 616 617 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 618 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 619 620 /* forward solve the lower triangular */ 621 tmp[0] = b[*r++]; 622 tmps = tmp; 623 for (i=1; i<n; i++) { 624 v = aa + ai[i] ; 625 vi = aj + ai[i] ; 626 nz = a->diag[i] - ai[i]; 627 sum = b[*r++]; 628 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 629 tmp[i] = sum; 630 } 631 632 /* backward solve the upper triangular */ 633 for (i=n-1; i>=0; i--){ 634 v = aa + a->diag[i] + 1; 635 vi = aj + a->diag[i] + 1; 636 nz = ai[i+1] - a->diag[i] - 1; 637 sum = tmp[i]; 638 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 639 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 640 } 641 642 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 643 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 644 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 646 PetscLogFlops(2*a->nz - A->n); 647 PetscFunctionReturn(0); 648 } 649 650 /* ----------------------------------------------------------- */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 653 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 654 { 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 656 PetscErrorCode ierr; 657 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 658 PetscScalar *x,*b,*aa = a->a; 659 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 660 PetscInt adiag_i,i,*vi,nz,ai_i; 661 PetscScalar *v,sum; 662 #endif 663 664 PetscFunctionBegin; 665 if (!n) PetscFunctionReturn(0); 666 667 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 668 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 669 670 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 671 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 672 #else 673 /* forward solve the lower triangular */ 674 x[0] = b[0]; 675 for (i=1; i<n; i++) { 676 ai_i = ai[i]; 677 v = aa + ai_i; 678 vi = aj + ai_i; 679 nz = adiag[i] - ai_i; 680 sum = b[i]; 681 while (nz--) sum -= *v++ * x[*vi++]; 682 x[i] = sum; 683 } 684 685 /* backward solve the upper triangular */ 686 for (i=n-1; i>=0; i--){ 687 adiag_i = adiag[i]; 688 v = aa + adiag_i + 1; 689 vi = aj + adiag_i + 1; 690 nz = ai[i+1] - adiag_i - 1; 691 sum = x[i]; 692 while (nz--) sum -= *v++ * x[*vi++]; 693 x[i] = sum*aa[adiag_i]; 694 } 695 #endif 696 PetscLogFlops(2*a->nz - A->n); 697 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 704 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 705 { 706 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 707 IS iscol = a->col,isrow = a->row; 708 PetscErrorCode ierr; 709 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 710 PetscInt nz,*rout,*cout; 711 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 712 713 PetscFunctionBegin; 714 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 715 716 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 tmp = a->solve_work; 719 720 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 721 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 722 723 /* forward solve the lower triangular */ 724 tmp[0] = b[*r++]; 725 for (i=1; i<n; i++) { 726 v = aa + ai[i] ; 727 vi = aj + ai[i] ; 728 nz = a->diag[i] - ai[i]; 729 sum = b[*r++]; 730 while (nz--) sum -= *v++ * tmp[*vi++ ]; 731 tmp[i] = sum; 732 } 733 734 /* backward solve the upper triangular */ 735 for (i=n-1; i>=0; i--){ 736 v = aa + a->diag[i] + 1; 737 vi = aj + a->diag[i] + 1; 738 nz = ai[i+1] - a->diag[i] - 1; 739 sum = tmp[i]; 740 while (nz--) sum -= *v++ * tmp[*vi++ ]; 741 tmp[i] = sum*aa[a->diag[i]]; 742 x[*c--] += tmp[i]; 743 } 744 745 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 PetscLogFlops(2*a->nz); 750 751 PetscFunctionReturn(0); 752 } 753 /* -------------------------------------------------------------------*/ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 756 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 757 { 758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 759 IS iscol = a->col,isrow = a->row; 760 PetscErrorCode ierr; 761 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 762 PetscInt nz,*rout,*cout,*diag = a->diag; 763 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 764 765 PetscFunctionBegin; 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 772 773 /* copy the b into temp work space according to permutation */ 774 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 775 776 /* forward solve the U^T */ 777 for (i=0; i<n; i++) { 778 v = aa + diag[i] ; 779 vi = aj + diag[i] + 1; 780 nz = ai[i+1] - diag[i] - 1; 781 s1 = tmp[i]; 782 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 783 while (nz--) { 784 tmp[*vi++ ] -= (*v++)*s1; 785 } 786 tmp[i] = s1; 787 } 788 789 /* backward solve the L^T */ 790 for (i=n-1; i>=0; i--){ 791 v = aa + diag[i] - 1 ; 792 vi = aj + diag[i] - 1 ; 793 nz = diag[i] - ai[i]; 794 s1 = tmp[i]; 795 while (nz--) { 796 tmp[*vi-- ] -= (*v--)*s1; 797 } 798 } 799 800 /* copy tmp into x according to permutation */ 801 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 802 803 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 804 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 807 808 PetscLogFlops(2*a->nz-A->n); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 814 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 815 { 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 817 IS iscol = a->col,isrow = a->row; 818 PetscErrorCode ierr; 819 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 820 PetscInt nz,*rout,*cout,*diag = a->diag; 821 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 822 823 PetscFunctionBegin; 824 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 825 826 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 827 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 828 tmp = a->solve_work; 829 830 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 831 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 832 833 /* copy the b into temp work space according to permutation */ 834 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 835 836 /* forward solve the U^T */ 837 for (i=0; i<n; i++) { 838 v = aa + diag[i] ; 839 vi = aj + diag[i] + 1; 840 nz = ai[i+1] - diag[i] - 1; 841 tmp[i] *= *v++; 842 while (nz--) { 843 tmp[*vi++ ] -= (*v++)*tmp[i]; 844 } 845 } 846 847 /* backward solve the L^T */ 848 for (i=n-1; i>=0; i--){ 849 v = aa + diag[i] - 1 ; 850 vi = aj + diag[i] - 1 ; 851 nz = diag[i] - ai[i]; 852 while (nz--) { 853 tmp[*vi-- ] -= (*v--)*tmp[i]; 854 } 855 } 856 857 /* copy tmp into x according to permutation */ 858 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 859 860 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 861 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 862 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 864 865 PetscLogFlops(2*a->nz); 866 PetscFunctionReturn(0); 867 } 868 /* ----------------------------------------------------------------*/ 869 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 873 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 874 { 875 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 876 IS isicol; 877 PetscErrorCode ierr; 878 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 879 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 880 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 881 PetscInt incrlev,nnz,i,levels,diagonal_fill; 882 PetscTruth col_identity,row_identity; 883 PetscReal f; 884 885 PetscFunctionBegin; 886 f = info->fill; 887 levels = (PetscInt)info->levels; 888 diagonal_fill = (PetscInt)info->diagonal_fill; 889 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 890 891 /* special case that simply copies fill pattern */ 892 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 893 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 894 if (!levels && row_identity && col_identity) { 895 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 896 (*fact)->factor = FACTOR_LU; 897 b = (Mat_SeqAIJ*)(*fact)->data; 898 if (!b->diag) { 899 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 } 901 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 902 b->row = isrow; 903 b->col = iscol; 904 b->icol = isicol; 905 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 906 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 907 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 908 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 913 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 914 915 /* get new row pointers */ 916 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 917 ainew[0] = 0; 918 /* don't know how many column pointers are needed so estimate */ 919 jmax = (PetscInt)(f*(ai[n]+1)); 920 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 921 /* ajfill is level of fill for each fill entry */ 922 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 923 /* fill is a linked list of nonzeros in active row */ 924 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 925 /* im is level for each filled value */ 926 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 927 /* dloc is location of diagonal in factor */ 928 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 929 dloc[0] = 0; 930 for (prow=0; prow<n; prow++) { 931 932 /* copy current row into linked list */ 933 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 934 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 935 xi = aj + ai[r[prow]]; 936 fill[n] = n; 937 fill[prow] = -1; /* marker to indicate if diagonal exists */ 938 while (nz--) { 939 fm = n; 940 idx = ic[*xi++ ]; 941 do { 942 m = fm; 943 fm = fill[m]; 944 } while (fm < idx); 945 fill[m] = idx; 946 fill[idx] = fm; 947 im[idx] = 0; 948 } 949 950 /* make sure diagonal entry is included */ 951 if (diagonal_fill && fill[prow] == -1) { 952 fm = n; 953 while (fill[fm] < prow) fm = fill[fm]; 954 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 955 fill[fm] = prow; 956 im[prow] = 0; 957 nzf++; 958 dcount++; 959 } 960 961 nzi = 0; 962 row = fill[n]; 963 while (row < prow) { 964 incrlev = im[row] + 1; 965 nz = dloc[row]; 966 xi = ajnew + ainew[row] + nz + 1; 967 flev = ajfill + ainew[row] + nz + 1; 968 nnz = ainew[row+1] - ainew[row] - nz - 1; 969 fm = row; 970 while (nnz-- > 0) { 971 idx = *xi++ ; 972 if (*flev + incrlev > levels) { 973 flev++; 974 continue; 975 } 976 do { 977 m = fm; 978 fm = fill[m]; 979 } while (fm < idx); 980 if (fm != idx) { 981 im[idx] = *flev + incrlev; 982 fill[m] = idx; 983 fill[idx] = fm; 984 fm = idx; 985 nzf++; 986 } else { 987 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 988 } 989 flev++; 990 } 991 row = fill[row]; 992 nzi++; 993 } 994 /* copy new filled row into permanent storage */ 995 ainew[prow+1] = ainew[prow] + nzf; 996 if (ainew[prow+1] > jmax) { 997 998 /* estimate how much additional space we will need */ 999 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1000 /* just double the memory each time */ 1001 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1002 PetscInt maxadd = jmax; 1003 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1004 jmax += maxadd; 1005 1006 /* allocate a longer ajnew and ajfill */ 1007 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1008 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1009 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1010 ajnew = xi; 1011 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1012 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1013 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1014 ajfill = xi; 1015 reallocs++; /* count how many times we realloc */ 1016 } 1017 xi = ajnew + ainew[prow] ; 1018 flev = ajfill + ainew[prow] ; 1019 dloc[prow] = nzi; 1020 fm = fill[n]; 1021 while (nzf--) { 1022 *xi++ = fm ; 1023 *flev++ = im[fm]; 1024 fm = fill[fm]; 1025 } 1026 /* make sure row has diagonal entry */ 1027 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1028 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1029 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1030 } 1031 } 1032 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1033 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1035 ierr = PetscFree(fill);CHKERRQ(ierr); 1036 ierr = PetscFree(im);CHKERRQ(ierr); 1037 1038 { 1039 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1044 if (diagonal_fill) { 1045 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1046 } 1047 } 1048 1049 /* put together the new matrix */ 1050 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1051 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1052 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1053 PetscLogObjectParent(*fact,isicol); 1054 b = (Mat_SeqAIJ*)(*fact)->data; 1055 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1056 b->singlemalloc = PETSC_FALSE; 1057 len = (ainew[n] )*sizeof(PetscScalar); 1058 /* the next line frees the default space generated by the Create() */ 1059 ierr = PetscFree(b->a);CHKERRQ(ierr); 1060 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1061 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1062 b->j = ajnew; 1063 b->i = ainew; 1064 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1065 b->diag = dloc; 1066 b->ilen = 0; 1067 b->imax = 0; 1068 b->row = isrow; 1069 b->col = iscol; 1070 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1071 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1072 b->icol = isicol; 1073 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1074 /* In b structure: Free imax, ilen, old a, old j. 1075 Allocate dloc, solve_work, new a, new j */ 1076 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1077 b->maxnz = b->nz = ainew[n] ; 1078 (*fact)->factor = FACTOR_LU; 1079 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1080 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1081 (*fact)->info.factor_mallocs = reallocs; 1082 (*fact)->info.fill_ratio_given = f; 1083 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #include "src/mat/impls/sbaij/seq/sbaij.h" 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1090 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1091 { 1092 Mat C = *B; 1093 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1094 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1095 IS ip=b->row; 1096 PetscErrorCode ierr; 1097 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1098 PetscInt *ai=a->i,*aj=a->j; 1099 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1100 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1101 PetscReal zeropivot,shift_amount,rs,shift_nonzero; 1102 PetscTruth chshift,shift_pd; 1103 PetscInt nshift=0; 1104 1105 PetscFunctionBegin; 1106 shift_nonzero = info->shiftnz; 1107 shift_pd = info->shiftpd; 1108 zeropivot = info->zeropivot; 1109 1110 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1111 1112 /* initialization */ 1113 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1114 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1115 jl = il + mbs; 1116 rtmp = (MatScalar*)(jl + mbs); 1117 1118 shift_amount = 0; 1119 do { 1120 chshift = PETSC_FALSE; 1121 for (i=0; i<mbs; i++) { 1122 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1123 } 1124 1125 for (k = 0; k<mbs; k++){ 1126 bval = ba + bi[k]; 1127 /* initialize k-th row by the perm[k]-th row of A */ 1128 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1129 for (j = jmin; j < jmax; j++){ 1130 col = rip[aj[j]]; 1131 if (col >= k){ /* only take upper triangular entry */ 1132 rtmp[col] = aa[j]; 1133 *bval++ = 0.0; /* for in-place factorization */ 1134 } 1135 } 1136 /* shift the diagonal of the matrix */ 1137 if (nshift) rtmp[k] += shift_amount; 1138 1139 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1140 dk = rtmp[k]; 1141 i = jl[k]; /* first row to be added to k_th row */ 1142 1143 while (i < k){ 1144 nexti = jl[i]; /* next row to be added to k_th row */ 1145 1146 /* compute multiplier, update diag(k) and U(i,k) */ 1147 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1148 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1149 dk += uikdi*ba[ili]; 1150 ba[ili] = uikdi; /* -U(i,k) */ 1151 1152 /* add multiple of row i to k-th row */ 1153 jmin = ili + 1; jmax = bi[i+1]; 1154 if (jmin < jmax){ 1155 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1156 /* update il and jl for row i */ 1157 il[i] = jmin; 1158 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1159 } 1160 i = nexti; 1161 } 1162 1163 /* shift the diagonals when zero pivot is detected */ 1164 /* compute rs=sum of abs(off-diagonal) */ 1165 rs = 0.0; 1166 jmin = bi[k]+1; 1167 nz = bi[k+1] - jmin; 1168 if (nz){ 1169 bcol = bj + jmin; 1170 while (nz--){ 1171 rs += PetscAbsScalar(rtmp[*bcol++]); 1172 } 1173 } 1174 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1175 /* force |diag(*B)| > zeropivot*rs */ 1176 if (!nshift){ 1177 shift_amount = shift_nonzero; 1178 } else { 1179 shift_amount *= 2.0; 1180 } 1181 chshift = PETSC_TRUE; 1182 nshift++; 1183 break; 1184 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1185 /* calculate a shift that would make this row diagonally dominant */ 1186 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1187 chshift = PETSC_TRUE; 1188 /* Unlike in the ILU case there is no exit condition on nshift: 1189 we increase the shift until it converges. There is no guarantee that 1190 this algorithm converges faster or slower, or is better or worse 1191 than the ILU algorithm. */ 1192 nshift++; 1193 break; 1194 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1195 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1196 } 1197 1198 /* copy data into U(k,:) */ 1199 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1200 jmin = bi[k]+1; jmax = bi[k+1]; 1201 if (jmin < jmax) { 1202 for (j=jmin; j<jmax; j++){ 1203 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1204 } 1205 /* add the k-th row into il and jl */ 1206 il[k] = jmin; 1207 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1208 } 1209 } 1210 } while (chshift); 1211 ierr = PetscFree(il);CHKERRQ(ierr); 1212 1213 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1214 C->factor = FACTOR_CHOLESKY; 1215 C->assembled = PETSC_TRUE; 1216 C->preallocated = PETSC_TRUE; 1217 PetscLogFlops(C->m); 1218 if (nshift){ 1219 if (shift_nonzero) { 1220 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1221 } else if (shift_pd) { 1222 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1223 } 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #include "petscbt.h" 1229 #include "src/mat/utils/freespace.h" 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1233 { 1234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1235 Mat_SeqSBAIJ *b; 1236 Mat B; 1237 PetscErrorCode ierr; 1238 PetscTruth perm_identity; 1239 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1240 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1241 PetscInt nlnk,*lnk,*lnk_lvl,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1242 PetscReal fill=info->fill,levels=info->levels; 1243 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1244 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1245 PetscBT lnkbt; 1246 1247 PetscFunctionBegin; 1248 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1249 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1250 1251 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1252 ui[0] = 0; 1253 1254 /* special case that simply copies fill pattern */ 1255 if (!levels && perm_identity) { 1256 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1257 for (i=0; i<am; i++) { 1258 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1259 } 1260 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1261 cols = uj; 1262 for (i=0; i<am; i++) { 1263 aj = a->j + a->diag[i]; 1264 ncols = ui[i+1] - ui[i]; 1265 for (j=0; j<ncols; j++) *cols++ = *aj++; 1266 } 1267 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1268 /* initialization */ 1269 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1270 1271 /* jl: linked list for storing indices of the pivot rows 1272 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1273 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1274 il = jl + am; 1275 uj_ptr = (PetscInt**)(il + am); 1276 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1277 for (i=0; i<am; i++){ 1278 jl[i] = am; il[i] = 0; 1279 } 1280 1281 /* create and initialize a linked list for storing column indices of the active row k */ 1282 nlnk = am + 1; 1283 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1284 1285 /* initial FreeSpace size is fill*(ai[am]+1) */ 1286 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1287 current_space = free_space; 1288 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1289 current_space_lvl = free_space_lvl; 1290 1291 for (k=0; k<am; k++){ /* for each active row k */ 1292 /* initialize lnk by the column indices of row rip[k] of A */ 1293 nzk = 0; 1294 ncols = ai[rip[k]+1] - ai[rip[k]]; 1295 ncols_upper = 0; 1296 cols = cols_lvl + am; 1297 for (j=0; j<ncols; j++){ 1298 i = rip[*(aj + ai[rip[k]] + j)]; 1299 if (i >= k){ /* only take upper triangular entry */ 1300 cols[ncols_upper] = i; 1301 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1302 ncols_upper++; 1303 } 1304 } 1305 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1306 nzk += nlnk; 1307 1308 /* update lnk by computing fill-in for each pivot row to be merged in */ 1309 prow = jl[k]; /* 1st pivot row */ 1310 1311 while (prow < k){ 1312 nextprow = jl[prow]; 1313 1314 /* merge prow into k-th row */ 1315 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1316 jmax = ui[prow+1]; 1317 ncols = jmax-jmin; 1318 i = jmin - ui[prow]; 1319 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1320 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1321 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1322 nzk += nlnk; 1323 1324 /* update il and jl for prow */ 1325 if (jmin < jmax){ 1326 il[prow] = jmin; 1327 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1328 } 1329 prow = nextprow; 1330 } 1331 1332 /* if free space is not available, make more free space */ 1333 if (current_space->local_remaining<nzk) { 1334 i = am - k + 1; /* num of unfactored rows */ 1335 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1336 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1337 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1338 reallocs++; 1339 } 1340 1341 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1342 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1343 1344 /* add the k-th row into il and jl */ 1345 if (nzk > 1){ 1346 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1347 jl[k] = jl[i]; jl[i] = k; 1348 il[k] = ui[k] + 1; 1349 } 1350 uj_ptr[k] = current_space->array; 1351 uj_lvl_ptr[k] = current_space_lvl->array; 1352 1353 current_space->array += nzk; 1354 current_space->local_used += nzk; 1355 current_space->local_remaining -= nzk; 1356 1357 current_space_lvl->array += nzk; 1358 current_space_lvl->local_used += nzk; 1359 current_space_lvl->local_remaining -= nzk; 1360 1361 ui[k+1] = ui[k] + nzk; 1362 } 1363 1364 if (ai[am] != 0) { 1365 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1366 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1367 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1368 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1369 } else { 1370 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1371 } 1372 1373 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1374 ierr = PetscFree(jl);CHKERRQ(ierr); 1375 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1376 1377 /* destroy list of free space and other temporary array(s) */ 1378 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1379 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1380 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1381 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1382 1383 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1384 1385 /* put together the new matrix in MATSEQSBAIJ format */ 1386 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1387 B = *fact; 1388 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1389 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1390 1391 b = (Mat_SeqSBAIJ*)B->data; 1392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1393 b->singlemalloc = PETSC_FALSE; 1394 /* the next line frees the default space generated by the Create() */ 1395 ierr = PetscFree(b->a);CHKERRQ(ierr); 1396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1397 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1398 b->j = uj; 1399 b->i = ui; 1400 b->diag = 0; 1401 b->ilen = 0; 1402 b->imax = 0; 1403 b->row = perm; 1404 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1405 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1406 b->icol = perm; 1407 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1408 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1409 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1410 b->maxnz = b->nz = ui[am]; 1411 1412 B->factor = FACTOR_CHOLESKY; 1413 B->info.factor_mallocs = reallocs; 1414 B->info.fill_ratio_given = fill; 1415 if (ai[am] != 0) { 1416 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1417 } else { 1418 B->info.fill_ratio_needed = 0.0; 1419 } 1420 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1421 if (perm_identity){ 1422 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1423 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1430 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1431 { 1432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1433 Mat_SeqSBAIJ *b; 1434 Mat B; 1435 PetscErrorCode ierr; 1436 PetscTruth perm_identity; 1437 PetscReal fill = info->fill; 1438 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1439 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1440 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1441 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1442 PetscBT lnkbt; 1443 IS iperm; 1444 1445 PetscFunctionBegin; 1446 /* check whether perm is the identity mapping */ 1447 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1448 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1449 1450 if (!perm_identity){ 1451 /* check if perm is symmetric! */ 1452 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1453 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1454 for (i=0; i<mbs; i++) { 1455 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1456 } 1457 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1458 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1459 } 1460 1461 /* initialization */ 1462 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1463 ui[0] = 0; 1464 1465 /* jl: linked list for storing indices of the pivot rows 1466 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1467 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1468 il = jl + mbs; 1469 cols = il + mbs; 1470 ui_ptr = (PetscInt**)(cols + mbs); 1471 for (i=0; i<mbs; i++){ 1472 jl[i] = mbs; il[i] = 0; 1473 } 1474 1475 /* create and initialize a linked list for storing column indices of the active row k */ 1476 nlnk = mbs + 1; 1477 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1478 1479 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1480 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1481 current_space = free_space; 1482 1483 for (k=0; k<mbs; k++){ /* for each active row k */ 1484 /* initialize lnk by the column indices of row rip[k] of A */ 1485 nzk = 0; 1486 ncols = ai[rip[k]+1] - ai[rip[k]]; 1487 ncols_upper = 0; 1488 for (j=0; j<ncols; j++){ 1489 i = rip[*(aj + ai[rip[k]] + j)]; 1490 if (i >= k){ /* only take upper triangular entry */ 1491 cols[ncols_upper] = i; 1492 ncols_upper++; 1493 } 1494 } 1495 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1496 nzk += nlnk; 1497 1498 /* update lnk by computing fill-in for each pivot row to be merged in */ 1499 prow = jl[k]; /* 1st pivot row */ 1500 1501 while (prow < k){ 1502 nextprow = jl[prow]; 1503 /* merge prow into k-th row */ 1504 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1505 jmax = ui[prow+1]; 1506 ncols = jmax-jmin; 1507 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1508 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1509 nzk += nlnk; 1510 1511 /* update il and jl for prow */ 1512 if (jmin < jmax){ 1513 il[prow] = jmin; 1514 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1515 } 1516 prow = nextprow; 1517 } 1518 1519 /* if free space is not available, make more free space */ 1520 if (current_space->local_remaining<nzk) { 1521 i = mbs - k + 1; /* num of unfactored rows */ 1522 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1523 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1524 reallocs++; 1525 } 1526 1527 /* copy data into free space, then initialize lnk */ 1528 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1529 1530 /* add the k-th row into il and jl */ 1531 if (nzk-1 > 0){ 1532 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1533 jl[k] = jl[i]; jl[i] = k; 1534 il[k] = ui[k] + 1; 1535 } 1536 ui_ptr[k] = current_space->array; 1537 current_space->array += nzk; 1538 current_space->local_used += nzk; 1539 current_space->local_remaining -= nzk; 1540 1541 ui[k+1] = ui[k] + nzk; 1542 } 1543 1544 if (ai[mbs] != 0) { 1545 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1546 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1547 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1548 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1549 } else { 1550 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1551 } 1552 1553 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1554 ierr = PetscFree(jl);CHKERRQ(ierr); 1555 1556 /* destroy list of free space and other temporary array(s) */ 1557 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1558 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1559 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1560 1561 /* put together the new matrix in MATSEQSBAIJ format */ 1562 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1563 B = *fact; 1564 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1565 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1566 1567 b = (Mat_SeqSBAIJ*)B->data; 1568 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1569 b->singlemalloc = PETSC_FALSE; 1570 /* the next line frees the default space generated by the Create() */ 1571 ierr = PetscFree(b->a);CHKERRQ(ierr); 1572 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1573 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1574 b->j = uj; 1575 b->i = ui; 1576 b->diag = 0; 1577 b->ilen = 0; 1578 b->imax = 0; 1579 b->row = perm; 1580 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1581 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1582 b->icol = perm; 1583 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1584 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1585 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1586 b->maxnz = b->nz = ui[mbs]; 1587 1588 B->factor = FACTOR_CHOLESKY; 1589 B->info.factor_mallocs = reallocs; 1590 B->info.fill_ratio_given = fill; 1591 if (ai[mbs] != 0) { 1592 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1593 } else { 1594 B->info.fill_ratio_needed = 0.0; 1595 } 1596 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1597 if (perm_identity){ 1598 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1599 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603