1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 426 { 427 Mat C=*B; 428 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 429 IS isrow = b->row,isicol = b->icol; 430 PetscErrorCode ierr; 431 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 432 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 433 PetscInt *diag_offset = b->diag,diag,*pj; 434 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 435 PetscReal zeropivot,rs,d,shift_nz; 436 PetscReal row_shift,shift_top=0.; 437 PetscTruth shift_pd; 438 LUShift_Ctx sctx; 439 PetscInt newshift; 440 441 PetscFunctionBegin; 442 shift_nz = info->shiftnz; 443 shift_pd = info->shiftpd; 444 zeropivot = info->zeropivot; 445 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nz) shift_nz = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 sctx.shift_amount = 0; 477 sctx.shift_top = shift_top; 478 sctx.nshift = 0; 479 sctx.nshift_max = 5; 480 sctx.shift_lo = 0.; 481 sctx.shift_hi = 1.; 482 do { 483 sctx.lushift = PETSC_FALSE; 484 for (i=0; i<n; i++){ 485 nz = bi[i+1] - bi[i]; 486 bjtmp = bj + bi[i]; 487 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 488 489 /* load in initial (unfactored row) */ 490 nz = a->i[r[i]+1] - a->i[r[i]]; 491 ajtmp = a->j + a->i[r[i]]; 492 v = a->a + a->i[r[i]]; 493 for (j=0; j<nz; j++) { 494 rtmp[ics[ajtmp[j]]] = v[j]; 495 } 496 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 497 498 row = *bjtmp++; 499 while (row < i) { 500 pc = rtmp + row; 501 if (*pc != 0.0) { 502 pv = b->a + diag_offset[row]; 503 pj = b->j + diag_offset[row] + 1; 504 multiplier = *pc / *pv++; 505 *pc = multiplier; 506 nz = bi[row+1] - diag_offset[row] - 1; 507 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 508 PetscLogFlops(2*nz); 509 } 510 row = *bjtmp++; 511 } 512 /* finished row so stick it into b->a */ 513 pv = b->a + bi[i] ; 514 pj = b->j + bi[i] ; 515 nz = bi[i+1] - bi[i]; 516 diag = diag_offset[i] - bi[i]; 517 rs = 0.0; 518 for (j=0; j<nz; j++) { 519 pv[j] = rtmps[pj[j]]; 520 if (j != diag) rs += PetscAbsScalar(pv[j]); 521 } 522 523 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 524 sctx.rs = rs; 525 sctx.pv = pv[diag]; 526 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 527 if (newshift == 1){ 528 break; /* sctx.shift_amount is updated */ 529 } else if (newshift == -1){ 530 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 531 } 532 } 533 534 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 535 /* 536 * if no shift in this attempt & shifting & started shifting & can refine, 537 * then try lower shift 538 */ 539 sctx.shift_hi = info->shift_fraction; 540 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 541 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 542 sctx.lushift = PETSC_TRUE; 543 sctx.nshift++; 544 } 545 } while (sctx.lushift); 546 547 /* invert diagonal entries for simplier triangular solves */ 548 for (i=0; i<n; i++) { 549 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 550 } 551 552 ierr = PetscFree(rtmp);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 554 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 555 C->factor = FACTOR_LU; 556 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 557 C->assembled = PETSC_TRUE; 558 PetscLogFlops(C->n); 559 if (sctx.nshift){ 560 if (shift_nz) { 561 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 562 } else if (shift_pd) { 563 b->lu_shift_fraction = info->shift_fraction; 564 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 572 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 573 { 574 PetscFunctionBegin; 575 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 576 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 577 PetscFunctionReturn(0); 578 } 579 580 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatLUFactor_SeqAIJ" 584 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 585 { 586 PetscErrorCode ierr; 587 Mat C; 588 589 PetscFunctionBegin; 590 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 591 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 592 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 593 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 594 PetscFunctionReturn(0); 595 } 596 /* ----------------------------------------------------------- */ 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatSolve_SeqAIJ" 599 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 600 { 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 602 IS iscol = a->col,isrow = a->row; 603 PetscErrorCode ierr; 604 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 605 PetscInt nz,*rout,*cout; 606 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 607 608 PetscFunctionBegin; 609 if (!n) PetscFunctionReturn(0); 610 611 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 612 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 613 tmp = a->solve_work; 614 615 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 616 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 617 618 /* forward solve the lower triangular */ 619 tmp[0] = b[*r++]; 620 tmps = tmp; 621 for (i=1; i<n; i++) { 622 v = aa + ai[i] ; 623 vi = aj + ai[i] ; 624 nz = a->diag[i] - ai[i]; 625 sum = b[*r++]; 626 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 627 tmp[i] = sum; 628 } 629 630 /* backward solve the upper triangular */ 631 for (i=n-1; i>=0; i--){ 632 v = aa + a->diag[i] + 1; 633 vi = aj + a->diag[i] + 1; 634 nz = ai[i+1] - a->diag[i] - 1; 635 sum = tmp[i]; 636 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 637 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 638 } 639 640 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 641 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 642 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 643 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 644 PetscLogFlops(2*a->nz - A->n); 645 PetscFunctionReturn(0); 646 } 647 648 /* ----------------------------------------------------------- */ 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 651 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 652 { 653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 654 PetscErrorCode ierr; 655 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 656 PetscScalar *x,*b,*aa = a->a; 657 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 658 PetscInt adiag_i,i,*vi,nz,ai_i; 659 PetscScalar *v,sum; 660 #endif 661 662 PetscFunctionBegin; 663 if (!n) PetscFunctionReturn(0); 664 665 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 666 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 667 668 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 669 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 670 #else 671 /* forward solve the lower triangular */ 672 x[0] = b[0]; 673 for (i=1; i<n; i++) { 674 ai_i = ai[i]; 675 v = aa + ai_i; 676 vi = aj + ai_i; 677 nz = adiag[i] - ai_i; 678 sum = b[i]; 679 while (nz--) sum -= *v++ * x[*vi++]; 680 x[i] = sum; 681 } 682 683 /* backward solve the upper triangular */ 684 for (i=n-1; i>=0; i--){ 685 adiag_i = adiag[i]; 686 v = aa + adiag_i + 1; 687 vi = aj + adiag_i + 1; 688 nz = ai[i+1] - adiag_i - 1; 689 sum = x[i]; 690 while (nz--) sum -= *v++ * x[*vi++]; 691 x[i] = sum*aa[adiag_i]; 692 } 693 #endif 694 PetscLogFlops(2*a->nz - A->n); 695 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 696 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 702 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 703 { 704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 705 IS iscol = a->col,isrow = a->row; 706 PetscErrorCode ierr; 707 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 708 PetscInt nz,*rout,*cout; 709 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 710 711 PetscFunctionBegin; 712 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 713 714 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 715 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 716 tmp = a->solve_work; 717 718 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 719 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 720 721 /* forward solve the lower triangular */ 722 tmp[0] = b[*r++]; 723 for (i=1; i<n; i++) { 724 v = aa + ai[i] ; 725 vi = aj + ai[i] ; 726 nz = a->diag[i] - ai[i]; 727 sum = b[*r++]; 728 while (nz--) sum -= *v++ * tmp[*vi++ ]; 729 tmp[i] = sum; 730 } 731 732 /* backward solve the upper triangular */ 733 for (i=n-1; i>=0; i--){ 734 v = aa + a->diag[i] + 1; 735 vi = aj + a->diag[i] + 1; 736 nz = ai[i+1] - a->diag[i] - 1; 737 sum = tmp[i]; 738 while (nz--) sum -= *v++ * tmp[*vi++ ]; 739 tmp[i] = sum*aa[a->diag[i]]; 740 x[*c--] += tmp[i]; 741 } 742 743 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 744 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 745 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 PetscLogFlops(2*a->nz); 748 749 PetscFunctionReturn(0); 750 } 751 /* -------------------------------------------------------------------*/ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 754 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 PetscErrorCode ierr; 759 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 760 PetscInt nz,*rout,*cout,*diag = a->diag; 761 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 762 763 PetscFunctionBegin; 764 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 765 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 766 tmp = a->solve_work; 767 768 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 769 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 770 771 /* copy the b into temp work space according to permutation */ 772 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 773 774 /* forward solve the U^T */ 775 for (i=0; i<n; i++) { 776 v = aa + diag[i] ; 777 vi = aj + diag[i] + 1; 778 nz = ai[i+1] - diag[i] - 1; 779 s1 = tmp[i]; 780 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 781 while (nz--) { 782 tmp[*vi++ ] -= (*v++)*s1; 783 } 784 tmp[i] = s1; 785 } 786 787 /* backward solve the L^T */ 788 for (i=n-1; i>=0; i--){ 789 v = aa + diag[i] - 1 ; 790 vi = aj + diag[i] - 1 ; 791 nz = diag[i] - ai[i]; 792 s1 = tmp[i]; 793 while (nz--) { 794 tmp[*vi-- ] -= (*v--)*s1; 795 } 796 } 797 798 /* copy tmp into x according to permutation */ 799 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 800 801 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 803 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 805 806 PetscLogFlops(2*a->nz-A->n); 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 812 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 813 { 814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 815 IS iscol = a->col,isrow = a->row; 816 PetscErrorCode ierr; 817 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 818 PetscInt nz,*rout,*cout,*diag = a->diag; 819 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 820 821 PetscFunctionBegin; 822 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 823 824 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 tmp = a->solve_work; 827 828 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 829 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 830 831 /* copy the b into temp work space according to permutation */ 832 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 833 834 /* forward solve the U^T */ 835 for (i=0; i<n; i++) { 836 v = aa + diag[i] ; 837 vi = aj + diag[i] + 1; 838 nz = ai[i+1] - diag[i] - 1; 839 tmp[i] *= *v++; 840 while (nz--) { 841 tmp[*vi++ ] -= (*v++)*tmp[i]; 842 } 843 } 844 845 /* backward solve the L^T */ 846 for (i=n-1; i>=0; i--){ 847 v = aa + diag[i] - 1 ; 848 vi = aj + diag[i] - 1 ; 849 nz = diag[i] - ai[i]; 850 while (nz--) { 851 tmp[*vi-- ] -= (*v--)*tmp[i]; 852 } 853 } 854 855 /* copy tmp into x according to permutation */ 856 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 857 858 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 859 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 860 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 861 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 862 863 PetscLogFlops(2*a->nz); 864 PetscFunctionReturn(0); 865 } 866 /* ----------------------------------------------------------------*/ 867 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 871 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 872 { 873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 874 IS isicol; 875 PetscErrorCode ierr; 876 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 877 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 878 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 879 PetscInt incrlev,nnz,i,levels,diagonal_fill; 880 PetscTruth col_identity,row_identity; 881 PetscReal f; 882 883 PetscFunctionBegin; 884 f = info->fill; 885 levels = (PetscInt)info->levels; 886 diagonal_fill = (PetscInt)info->diagonal_fill; 887 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 888 889 /* special case that simply copies fill pattern */ 890 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 891 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 892 if (!levels && row_identity && col_identity) { 893 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 894 (*fact)->factor = FACTOR_LU; 895 b = (Mat_SeqAIJ*)(*fact)->data; 896 if (!b->diag) { 897 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 } 899 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 b->row = isrow; 901 b->col = iscol; 902 b->icol = isicol; 903 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 904 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 905 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 906 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 911 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 912 913 /* get new row pointers */ 914 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 915 ainew[0] = 0; 916 /* don't know how many column pointers are needed so estimate */ 917 jmax = (PetscInt)(f*(ai[n]+1)); 918 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 919 /* ajfill is level of fill for each fill entry */ 920 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 921 /* fill is a linked list of nonzeros in active row */ 922 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 923 /* im is level for each filled value */ 924 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 925 /* dloc is location of diagonal in factor */ 926 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 927 dloc[0] = 0; 928 for (prow=0; prow<n; prow++) { 929 930 /* copy current row into linked list */ 931 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 932 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 933 xi = aj + ai[r[prow]]; 934 fill[n] = n; 935 fill[prow] = -1; /* marker to indicate if diagonal exists */ 936 while (nz--) { 937 fm = n; 938 idx = ic[*xi++ ]; 939 do { 940 m = fm; 941 fm = fill[m]; 942 } while (fm < idx); 943 fill[m] = idx; 944 fill[idx] = fm; 945 im[idx] = 0; 946 } 947 948 /* make sure diagonal entry is included */ 949 if (diagonal_fill && fill[prow] == -1) { 950 fm = n; 951 while (fill[fm] < prow) fm = fill[fm]; 952 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 953 fill[fm] = prow; 954 im[prow] = 0; 955 nzf++; 956 dcount++; 957 } 958 959 nzi = 0; 960 row = fill[n]; 961 while (row < prow) { 962 incrlev = im[row] + 1; 963 nz = dloc[row]; 964 xi = ajnew + ainew[row] + nz + 1; 965 flev = ajfill + ainew[row] + nz + 1; 966 nnz = ainew[row+1] - ainew[row] - nz - 1; 967 fm = row; 968 while (nnz-- > 0) { 969 idx = *xi++ ; 970 if (*flev + incrlev > levels) { 971 flev++; 972 continue; 973 } 974 do { 975 m = fm; 976 fm = fill[m]; 977 } while (fm < idx); 978 if (fm != idx) { 979 im[idx] = *flev + incrlev; 980 fill[m] = idx; 981 fill[idx] = fm; 982 fm = idx; 983 nzf++; 984 } else { 985 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 986 } 987 flev++; 988 } 989 row = fill[row]; 990 nzi++; 991 } 992 /* copy new filled row into permanent storage */ 993 ainew[prow+1] = ainew[prow] + nzf; 994 if (ainew[prow+1] > jmax) { 995 996 /* estimate how much additional space we will need */ 997 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 998 /* just double the memory each time */ 999 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1000 PetscInt maxadd = jmax; 1001 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1002 jmax += maxadd; 1003 1004 /* allocate a longer ajnew and ajfill */ 1005 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1006 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1007 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1008 ajnew = xi; 1009 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1010 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1011 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1012 ajfill = xi; 1013 reallocs++; /* count how many times we realloc */ 1014 } 1015 xi = ajnew + ainew[prow] ; 1016 flev = ajfill + ainew[prow] ; 1017 dloc[prow] = nzi; 1018 fm = fill[n]; 1019 while (nzf--) { 1020 *xi++ = fm ; 1021 *flev++ = im[fm]; 1022 fm = fill[fm]; 1023 } 1024 /* make sure row has diagonal entry */ 1025 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1026 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1027 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1028 } 1029 } 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1033 ierr = PetscFree(fill);CHKERRQ(ierr); 1034 ierr = PetscFree(im);CHKERRQ(ierr); 1035 1036 { 1037 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1042 if (diagonal_fill) { 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1044 } 1045 } 1046 1047 /* put together the new matrix */ 1048 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1049 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1050 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1051 PetscLogObjectParent(*fact,isicol); 1052 b = (Mat_SeqAIJ*)(*fact)->data; 1053 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1054 b->singlemalloc = PETSC_FALSE; 1055 len = (ainew[n] )*sizeof(PetscScalar); 1056 /* the next line frees the default space generated by the Create() */ 1057 ierr = PetscFree(b->a);CHKERRQ(ierr); 1058 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1059 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1060 b->j = ajnew; 1061 b->i = ainew; 1062 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1063 b->diag = dloc; 1064 b->ilen = 0; 1065 b->imax = 0; 1066 b->row = isrow; 1067 b->col = iscol; 1068 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1069 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1070 b->icol = isicol; 1071 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1072 /* In b structure: Free imax, ilen, old a, old j. 1073 Allocate dloc, solve_work, new a, new j */ 1074 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1075 b->maxnz = b->nz = ainew[n] ; 1076 (*fact)->factor = FACTOR_LU; 1077 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1078 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1079 (*fact)->info.factor_mallocs = reallocs; 1080 (*fact)->info.fill_ratio_given = f; 1081 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #include "src/mat/impls/sbaij/seq/sbaij.h" 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1088 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1089 { 1090 Mat C = *B; 1091 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1092 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1093 IS ip=b->row; 1094 PetscErrorCode ierr; 1095 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1096 PetscInt *ai=a->i,*aj=a->j; 1097 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1098 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1099 PetscReal zeropivot,rs,shiftnz; 1100 PetscTruth shiftpd; 1101 ChShift_Ctx sctx; 1102 PetscInt newshift; 1103 1104 PetscFunctionBegin; 1105 shiftnz = info->shiftnz; 1106 shiftpd = info->shiftpd; 1107 zeropivot = info->zeropivot; 1108 1109 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1110 1111 /* initialization */ 1112 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1113 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1114 jl = il + mbs; 1115 rtmp = (MatScalar*)(jl + mbs); 1116 1117 sctx.shift_amount = 0; 1118 sctx.nshift = 0; 1119 do { 1120 sctx.chshift = PETSC_FALSE; 1121 for (i=0; i<mbs; i++) { 1122 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1123 } 1124 1125 for (k = 0; k<mbs; k++){ 1126 bval = ba + bi[k]; 1127 /* initialize k-th row by the perm[k]-th row of A */ 1128 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1129 for (j = jmin; j < jmax; j++){ 1130 col = rip[aj[j]]; 1131 if (col >= k){ /* only take upper triangular entry */ 1132 rtmp[col] = aa[j]; 1133 *bval++ = 0.0; /* for in-place factorization */ 1134 } 1135 } 1136 /* shift the diagonal of the matrix */ 1137 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1138 1139 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1140 dk = rtmp[k]; 1141 i = jl[k]; /* first row to be added to k_th row */ 1142 1143 while (i < k){ 1144 nexti = jl[i]; /* next row to be added to k_th row */ 1145 1146 /* compute multiplier, update diag(k) and U(i,k) */ 1147 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1148 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1149 dk += uikdi*ba[ili]; 1150 ba[ili] = uikdi; /* -U(i,k) */ 1151 1152 /* add multiple of row i to k-th row */ 1153 jmin = ili + 1; jmax = bi[i+1]; 1154 if (jmin < jmax){ 1155 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1156 /* update il and jl for row i */ 1157 il[i] = jmin; 1158 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1159 } 1160 i = nexti; 1161 } 1162 1163 /* shift the diagonals when zero pivot is detected */ 1164 /* compute rs=sum of abs(off-diagonal) */ 1165 rs = 0.0; 1166 jmin = bi[k]+1; 1167 nz = bi[k+1] - jmin; 1168 if (nz){ 1169 bcol = bj + jmin; 1170 while (nz--){ 1171 rs += PetscAbsScalar(rtmp[*bcol]); 1172 bcol++; 1173 } 1174 } 1175 1176 sctx.rs = rs; 1177 sctx.pv = dk; 1178 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1179 if (newshift == 1){ 1180 break; /* sctx.shift_amount is updated */ 1181 } else if (newshift == -1){ 1182 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1183 } 1184 1185 /* copy data into U(k,:) */ 1186 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1187 jmin = bi[k]+1; jmax = bi[k+1]; 1188 if (jmin < jmax) { 1189 for (j=jmin; j<jmax; j++){ 1190 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1191 } 1192 /* add the k-th row into il and jl */ 1193 il[k] = jmin; 1194 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1195 } 1196 } 1197 } while (sctx.chshift); 1198 ierr = PetscFree(il);CHKERRQ(ierr); 1199 1200 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1201 C->factor = FACTOR_CHOLESKY; 1202 C->assembled = PETSC_TRUE; 1203 C->preallocated = PETSC_TRUE; 1204 PetscLogFlops(C->m); 1205 if (sctx.nshift){ 1206 if (shiftnz) { 1207 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1208 } else if (shiftpd) { 1209 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1210 } 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #include "petscbt.h" 1216 #include "src/mat/utils/freespace.h" 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1219 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1220 { 1221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1222 Mat_SeqSBAIJ *b; 1223 Mat B; 1224 PetscErrorCode ierr; 1225 PetscTruth perm_identity; 1226 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1227 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1228 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1229 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1230 PetscReal fill=info->fill,levels=info->levels; 1231 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1232 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1233 PetscBT lnkbt; 1234 1235 PetscFunctionBegin; 1236 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1237 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1238 1239 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1240 ui[0] = 0; 1241 1242 /* special case that simply copies fill pattern */ 1243 if (!levels && perm_identity) { 1244 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1245 for (i=0; i<am; i++) { 1246 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1247 } 1248 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1249 cols = uj; 1250 for (i=0; i<am; i++) { 1251 aj = a->j + a->diag[i]; 1252 ncols = ui[i+1] - ui[i]; 1253 for (j=0; j<ncols; j++) *cols++ = *aj++; 1254 } 1255 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1256 /* initialization */ 1257 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1258 1259 /* jl: linked list for storing indices of the pivot rows 1260 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1261 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1262 il = jl + am; 1263 uj_ptr = (PetscInt**)(il + am); 1264 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1265 for (i=0; i<am; i++){ 1266 jl[i] = am; il[i] = 0; 1267 } 1268 1269 /* create and initialize a linked list for storing column indices of the active row k */ 1270 nlnk = am + 1; 1271 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1272 1273 /* initial FreeSpace size is fill*(ai[am]+1) */ 1274 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1275 current_space = free_space; 1276 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1277 current_space_lvl = free_space_lvl; 1278 1279 for (k=0; k<am; k++){ /* for each active row k */ 1280 /* initialize lnk by the column indices of row rip[k] of A */ 1281 nzk = 0; 1282 ncols = ai[rip[k]+1] - ai[rip[k]]; 1283 ncols_upper = 0; 1284 cols = cols_lvl + am; 1285 for (j=0; j<ncols; j++){ 1286 i = rip[*(aj + ai[rip[k]] + j)]; 1287 if (i >= k){ /* only take upper triangular entry */ 1288 cols[ncols_upper] = i; 1289 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1290 ncols_upper++; 1291 } 1292 } 1293 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1294 nzk += nlnk; 1295 1296 /* update lnk by computing fill-in for each pivot row to be merged in */ 1297 prow = jl[k]; /* 1st pivot row */ 1298 1299 while (prow < k){ 1300 nextprow = jl[prow]; 1301 1302 /* merge prow into k-th row */ 1303 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1304 jmax = ui[prow+1]; 1305 ncols = jmax-jmin; 1306 i = jmin - ui[prow]; 1307 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1308 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1309 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1310 nzk += nlnk; 1311 1312 /* update il and jl for prow */ 1313 if (jmin < jmax){ 1314 il[prow] = jmin; 1315 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1316 } 1317 prow = nextprow; 1318 } 1319 1320 /* if free space is not available, make more free space */ 1321 if (current_space->local_remaining<nzk) { 1322 i = am - k + 1; /* num of unfactored rows */ 1323 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1324 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1325 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1326 reallocs++; 1327 } 1328 1329 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1330 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1331 1332 /* add the k-th row into il and jl */ 1333 if (nzk > 1){ 1334 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1335 jl[k] = jl[i]; jl[i] = k; 1336 il[k] = ui[k] + 1; 1337 } 1338 uj_ptr[k] = current_space->array; 1339 uj_lvl_ptr[k] = current_space_lvl->array; 1340 1341 current_space->array += nzk; 1342 current_space->local_used += nzk; 1343 current_space->local_remaining -= nzk; 1344 1345 current_space_lvl->array += nzk; 1346 current_space_lvl->local_used += nzk; 1347 current_space_lvl->local_remaining -= nzk; 1348 1349 ui[k+1] = ui[k] + nzk; 1350 } 1351 1352 if (ai[am] != 0) { 1353 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1354 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1355 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1356 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1357 } else { 1358 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1359 } 1360 1361 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1362 ierr = PetscFree(jl);CHKERRQ(ierr); 1363 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1364 1365 /* destroy list of free space and other temporary array(s) */ 1366 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1367 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1368 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1369 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1370 1371 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1372 1373 /* put together the new matrix in MATSEQSBAIJ format */ 1374 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1375 B = *fact; 1376 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1377 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1378 1379 b = (Mat_SeqSBAIJ*)B->data; 1380 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1381 b->singlemalloc = PETSC_FALSE; 1382 /* the next line frees the default space generated by the Create() */ 1383 ierr = PetscFree(b->a);CHKERRQ(ierr); 1384 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1385 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1386 b->j = uj; 1387 b->i = ui; 1388 b->diag = 0; 1389 b->ilen = 0; 1390 b->imax = 0; 1391 b->row = perm; 1392 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1393 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1394 b->icol = perm; 1395 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1396 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1397 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1398 b->maxnz = b->nz = ui[am]; 1399 1400 B->factor = FACTOR_CHOLESKY; 1401 B->info.factor_mallocs = reallocs; 1402 B->info.fill_ratio_given = fill; 1403 if (ai[am] != 0) { 1404 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1405 } else { 1406 B->info.fill_ratio_needed = 0.0; 1407 } 1408 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1409 if (perm_identity){ 1410 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1411 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1418 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1419 { 1420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1421 Mat_SeqSBAIJ *b; 1422 Mat B; 1423 PetscErrorCode ierr; 1424 PetscTruth perm_identity; 1425 PetscReal fill = info->fill; 1426 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1427 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1428 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1429 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1430 PetscBT lnkbt; 1431 IS iperm; 1432 1433 PetscFunctionBegin; 1434 /* check whether perm is the identity mapping */ 1435 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1436 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1437 1438 if (!perm_identity){ 1439 /* check if perm is symmetric! */ 1440 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1441 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1442 for (i=0; i<mbs; i++) { 1443 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1444 } 1445 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1446 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1447 } 1448 1449 /* initialization */ 1450 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1451 ui[0] = 0; 1452 1453 /* jl: linked list for storing indices of the pivot rows 1454 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1455 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1456 il = jl + mbs; 1457 cols = il + mbs; 1458 ui_ptr = (PetscInt**)(cols + mbs); 1459 for (i=0; i<mbs; i++){ 1460 jl[i] = mbs; il[i] = 0; 1461 } 1462 1463 /* create and initialize a linked list for storing column indices of the active row k */ 1464 nlnk = mbs + 1; 1465 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1466 1467 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1468 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1469 current_space = free_space; 1470 1471 for (k=0; k<mbs; k++){ /* for each active row k */ 1472 /* initialize lnk by the column indices of row rip[k] of A */ 1473 nzk = 0; 1474 ncols = ai[rip[k]+1] - ai[rip[k]]; 1475 ncols_upper = 0; 1476 for (j=0; j<ncols; j++){ 1477 i = rip[*(aj + ai[rip[k]] + j)]; 1478 if (i >= k){ /* only take upper triangular entry */ 1479 cols[ncols_upper] = i; 1480 ncols_upper++; 1481 } 1482 } 1483 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1484 nzk += nlnk; 1485 1486 /* update lnk by computing fill-in for each pivot row to be merged in */ 1487 prow = jl[k]; /* 1st pivot row */ 1488 1489 while (prow < k){ 1490 nextprow = jl[prow]; 1491 /* merge prow into k-th row */ 1492 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1493 jmax = ui[prow+1]; 1494 ncols = jmax-jmin; 1495 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1496 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1497 nzk += nlnk; 1498 1499 /* update il and jl for prow */ 1500 if (jmin < jmax){ 1501 il[prow] = jmin; 1502 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1503 } 1504 prow = nextprow; 1505 } 1506 1507 /* if free space is not available, make more free space */ 1508 if (current_space->local_remaining<nzk) { 1509 i = mbs - k + 1; /* num of unfactored rows */ 1510 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1511 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1512 reallocs++; 1513 } 1514 1515 /* copy data into free space, then initialize lnk */ 1516 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1517 1518 /* add the k-th row into il and jl */ 1519 if (nzk-1 > 0){ 1520 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1521 jl[k] = jl[i]; jl[i] = k; 1522 il[k] = ui[k] + 1; 1523 } 1524 ui_ptr[k] = current_space->array; 1525 current_space->array += nzk; 1526 current_space->local_used += nzk; 1527 current_space->local_remaining -= nzk; 1528 1529 ui[k+1] = ui[k] + nzk; 1530 } 1531 1532 if (ai[mbs] != 0) { 1533 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1534 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1535 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1536 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1537 } else { 1538 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1539 } 1540 1541 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1542 ierr = PetscFree(jl);CHKERRQ(ierr); 1543 1544 /* destroy list of free space and other temporary array(s) */ 1545 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1546 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1547 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1548 1549 /* put together the new matrix in MATSEQSBAIJ format */ 1550 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1551 B = *fact; 1552 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1553 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1554 1555 b = (Mat_SeqSBAIJ*)B->data; 1556 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1557 b->singlemalloc = PETSC_FALSE; 1558 /* the next line frees the default space generated by the Create() */ 1559 ierr = PetscFree(b->a);CHKERRQ(ierr); 1560 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1561 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1562 b->j = uj; 1563 b->i = ui; 1564 b->diag = 0; 1565 b->ilen = 0; 1566 b->imax = 0; 1567 b->row = perm; 1568 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1569 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1570 b->icol = perm; 1571 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1572 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1573 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1574 b->maxnz = b->nz = ui[mbs]; 1575 1576 B->factor = FACTOR_CHOLESKY; 1577 B->info.factor_mallocs = reallocs; 1578 B->info.fill_ratio_given = fill; 1579 if (ai[mbs] != 0) { 1580 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1581 } else { 1582 B->info.fill_ratio_needed = 0.0; 1583 } 1584 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1585 if (perm_identity){ 1586 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1587 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591