1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 426 { 427 Mat C=*B; 428 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 429 IS isrow = b->row,isicol = b->icol; 430 PetscErrorCode ierr; 431 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 432 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 433 PetscInt *diag_offset = b->diag,diag,*pj; 434 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 435 PetscReal zeropivot,rs,d,shift_nz; 436 PetscReal row_shift,shift_top=0.; 437 PetscTruth shift_pd; 438 LUShift_Ctx sctx; 439 PetscInt newshift; 440 441 PetscFunctionBegin; 442 shift_nz = info->shiftnz; 443 shift_pd = info->shiftpd; 444 zeropivot = info->zeropivot; 445 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nz) shift_nz = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 sctx.shift_amount = 0; 477 sctx.shift_top = shift_top; 478 sctx.nshift = 0; 479 sctx.nshift_max = 5; 480 sctx.shift_lo = 0.; 481 sctx.shift_hi = 1.; 482 do { 483 sctx.lushift = PETSC_FALSE; 484 for (i=0; i<n; i++){ 485 nz = bi[i+1] - bi[i]; 486 bjtmp = bj + bi[i]; 487 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 488 489 /* load in initial (unfactored row) */ 490 nz = a->i[r[i]+1] - a->i[r[i]]; 491 ajtmp = a->j + a->i[r[i]]; 492 v = a->a + a->i[r[i]]; 493 for (j=0; j<nz; j++) { 494 rtmp[ics[ajtmp[j]]] = v[j]; 495 } 496 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 497 498 row = *bjtmp++; 499 while (row < i) { 500 pc = rtmp + row; 501 if (*pc != 0.0) { 502 pv = b->a + diag_offset[row]; 503 pj = b->j + diag_offset[row] + 1; 504 multiplier = *pc / *pv++; 505 *pc = multiplier; 506 nz = bi[row+1] - diag_offset[row] - 1; 507 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 508 PetscLogFlops(2*nz); 509 } 510 row = *bjtmp++; 511 } 512 /* finished row so stick it into b->a */ 513 pv = b->a + bi[i] ; 514 pj = b->j + bi[i] ; 515 nz = bi[i+1] - bi[i]; 516 diag = diag_offset[i] - bi[i]; 517 rs = 0.0; 518 for (j=0; j<nz; j++) { 519 pv[j] = rtmps[pj[j]]; 520 if (j != diag) rs += PetscAbsScalar(pv[j]); 521 } 522 523 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 524 sctx.rs = rs; 525 sctx.pv = pv[diag]; 526 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 527 if (newshift == 1){ 528 break; /* sctx.shift_amount is updated */ 529 } else if (newshift == -1){ 530 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 531 } 532 } 533 534 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 535 /* 536 * if no shift in this attempt & shifting & started shifting & can refine, 537 * then try lower shift 538 */ 539 sctx.shift_hi = info->shift_fraction; 540 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 541 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 542 sctx.lushift = PETSC_TRUE; 543 sctx.nshift++; 544 } 545 } while (sctx.lushift); 546 547 /* invert diagonal entries for simplier triangular solves */ 548 for (i=0; i<n; i++) { 549 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 550 } 551 552 ierr = PetscFree(rtmp);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 554 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 555 C->factor = FACTOR_LU; 556 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 557 C->assembled = PETSC_TRUE; 558 PetscLogFlops(C->n); 559 if (sctx.nshift){ 560 if (shift_nz) { 561 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 562 } else if (shift_pd) { 563 b->lu_shift_fraction = info->shift_fraction; 564 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 572 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 573 { 574 PetscFunctionBegin; 575 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 576 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 577 PetscFunctionReturn(0); 578 } 579 580 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatLUFactor_SeqAIJ" 584 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 585 { 586 PetscErrorCode ierr; 587 Mat C; 588 589 PetscFunctionBegin; 590 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 591 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 592 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 593 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 594 PetscFunctionReturn(0); 595 } 596 /* ----------------------------------------------------------- */ 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatSolve_SeqAIJ" 599 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 600 { 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 602 IS iscol = a->col,isrow = a->row; 603 PetscErrorCode ierr; 604 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 605 PetscInt nz,*rout,*cout; 606 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 607 608 PetscFunctionBegin; 609 if (!n) PetscFunctionReturn(0); 610 611 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 612 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 613 tmp = a->solve_work; 614 615 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 616 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 617 618 /* forward solve the lower triangular */ 619 tmp[0] = b[*r++]; 620 tmps = tmp; 621 for (i=1; i<n; i++) { 622 v = aa + ai[i] ; 623 vi = aj + ai[i] ; 624 nz = a->diag[i] - ai[i]; 625 sum = b[*r++]; 626 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 627 tmp[i] = sum; 628 } 629 630 /* backward solve the upper triangular */ 631 for (i=n-1; i>=0; i--){ 632 v = aa + a->diag[i] + 1; 633 vi = aj + a->diag[i] + 1; 634 nz = ai[i+1] - a->diag[i] - 1; 635 sum = tmp[i]; 636 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 637 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 638 } 639 640 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 641 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 642 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 643 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 644 PetscLogFlops(2*a->nz - A->n); 645 PetscFunctionReturn(0); 646 } 647 648 /* ----------------------------------------------------------- */ 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 651 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 652 { 653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 654 PetscErrorCode ierr; 655 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 656 PetscScalar *x,*b,*aa = a->a; 657 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 658 PetscInt adiag_i,i,*vi,nz,ai_i; 659 PetscScalar *v,sum; 660 #endif 661 662 PetscFunctionBegin; 663 if (!n) PetscFunctionReturn(0); 664 665 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 666 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 667 668 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 669 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 670 #else 671 /* forward solve the lower triangular */ 672 x[0] = b[0]; 673 for (i=1; i<n; i++) { 674 ai_i = ai[i]; 675 v = aa + ai_i; 676 vi = aj + ai_i; 677 nz = adiag[i] - ai_i; 678 sum = b[i]; 679 while (nz--) sum -= *v++ * x[*vi++]; 680 x[i] = sum; 681 } 682 683 /* backward solve the upper triangular */ 684 for (i=n-1; i>=0; i--){ 685 adiag_i = adiag[i]; 686 v = aa + adiag_i + 1; 687 vi = aj + adiag_i + 1; 688 nz = ai[i+1] - adiag_i - 1; 689 sum = x[i]; 690 while (nz--) sum -= *v++ * x[*vi++]; 691 x[i] = sum*aa[adiag_i]; 692 } 693 #endif 694 PetscLogFlops(2*a->nz - A->n); 695 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 696 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 702 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 703 { 704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 705 IS iscol = a->col,isrow = a->row; 706 PetscErrorCode ierr; 707 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 708 PetscInt nz,*rout,*cout; 709 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 710 711 PetscFunctionBegin; 712 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 713 714 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 715 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 716 tmp = a->solve_work; 717 718 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 719 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 720 721 /* forward solve the lower triangular */ 722 tmp[0] = b[*r++]; 723 for (i=1; i<n; i++) { 724 v = aa + ai[i] ; 725 vi = aj + ai[i] ; 726 nz = a->diag[i] - ai[i]; 727 sum = b[*r++]; 728 while (nz--) sum -= *v++ * tmp[*vi++ ]; 729 tmp[i] = sum; 730 } 731 732 /* backward solve the upper triangular */ 733 for (i=n-1; i>=0; i--){ 734 v = aa + a->diag[i] + 1; 735 vi = aj + a->diag[i] + 1; 736 nz = ai[i+1] - a->diag[i] - 1; 737 sum = tmp[i]; 738 while (nz--) sum -= *v++ * tmp[*vi++ ]; 739 tmp[i] = sum*aa[a->diag[i]]; 740 x[*c--] += tmp[i]; 741 } 742 743 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 744 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 745 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 PetscLogFlops(2*a->nz); 748 749 PetscFunctionReturn(0); 750 } 751 /* -------------------------------------------------------------------*/ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 754 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 PetscErrorCode ierr; 759 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 760 PetscInt nz,*rout,*cout,*diag = a->diag; 761 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 762 763 PetscFunctionBegin; 764 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 765 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 766 tmp = a->solve_work; 767 768 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 769 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 770 771 /* copy the b into temp work space according to permutation */ 772 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 773 774 /* forward solve the U^T */ 775 for (i=0; i<n; i++) { 776 v = aa + diag[i] ; 777 vi = aj + diag[i] + 1; 778 nz = ai[i+1] - diag[i] - 1; 779 s1 = tmp[i]; 780 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 781 while (nz--) { 782 tmp[*vi++ ] -= (*v++)*s1; 783 } 784 tmp[i] = s1; 785 } 786 787 /* backward solve the L^T */ 788 for (i=n-1; i>=0; i--){ 789 v = aa + diag[i] - 1 ; 790 vi = aj + diag[i] - 1 ; 791 nz = diag[i] - ai[i]; 792 s1 = tmp[i]; 793 while (nz--) { 794 tmp[*vi-- ] -= (*v--)*s1; 795 } 796 } 797 798 /* copy tmp into x according to permutation */ 799 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 800 801 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 803 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 805 806 PetscLogFlops(2*a->nz-A->n); 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 812 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 813 { 814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 815 IS iscol = a->col,isrow = a->row; 816 PetscErrorCode ierr; 817 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 818 PetscInt nz,*rout,*cout,*diag = a->diag; 819 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 820 821 PetscFunctionBegin; 822 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 823 824 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 tmp = a->solve_work; 827 828 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 829 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 830 831 /* copy the b into temp work space according to permutation */ 832 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 833 834 /* forward solve the U^T */ 835 for (i=0; i<n; i++) { 836 v = aa + diag[i] ; 837 vi = aj + diag[i] + 1; 838 nz = ai[i+1] - diag[i] - 1; 839 tmp[i] *= *v++; 840 while (nz--) { 841 tmp[*vi++ ] -= (*v++)*tmp[i]; 842 } 843 } 844 845 /* backward solve the L^T */ 846 for (i=n-1; i>=0; i--){ 847 v = aa + diag[i] - 1 ; 848 vi = aj + diag[i] - 1 ; 849 nz = diag[i] - ai[i]; 850 while (nz--) { 851 tmp[*vi-- ] -= (*v--)*tmp[i]; 852 } 853 } 854 855 /* copy tmp into x according to permutation */ 856 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 857 858 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 859 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 860 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 861 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 862 863 PetscLogFlops(2*a->nz); 864 PetscFunctionReturn(0); 865 } 866 /* ----------------------------------------------------------------*/ 867 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 871 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 872 { 873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 874 IS isicol; 875 PetscErrorCode ierr; 876 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 877 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 878 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 879 PetscInt incrlev,nnz,i,levels,diagonal_fill; 880 PetscTruth col_identity,row_identity; 881 PetscReal f; 882 883 PetscFunctionBegin; 884 f = info->fill; 885 levels = (PetscInt)info->levels; 886 diagonal_fill = (PetscInt)info->diagonal_fill; 887 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 888 889 /* special case that simply copies fill pattern */ 890 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 891 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 892 if (!levels && row_identity && col_identity) { 893 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 894 (*fact)->factor = FACTOR_LU; 895 b = (Mat_SeqAIJ*)(*fact)->data; 896 if (!b->diag) { 897 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 } 899 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 b->row = isrow; 901 b->col = iscol; 902 b->icol = isicol; 903 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 904 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 905 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 906 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 911 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 912 913 /* get new row pointers */ 914 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 915 ainew[0] = 0; 916 /* don't know how many column pointers are needed so estimate */ 917 jmax = (PetscInt)(f*(ai[n]+1)); 918 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 919 /* ajfill is level of fill for each fill entry */ 920 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 921 /* fill is a linked list of nonzeros in active row */ 922 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 923 /* im is level for each filled value */ 924 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 925 /* dloc is location of diagonal in factor */ 926 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 927 dloc[0] = 0; 928 for (prow=0; prow<n; prow++) { 929 930 /* copy current row into linked list */ 931 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 932 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 933 xi = aj + ai[r[prow]]; 934 fill[n] = n; 935 fill[prow] = -1; /* marker to indicate if diagonal exists */ 936 while (nz--) { 937 fm = n; 938 idx = ic[*xi++ ]; 939 do { 940 m = fm; 941 fm = fill[m]; 942 } while (fm < idx); 943 fill[m] = idx; 944 fill[idx] = fm; 945 im[idx] = 0; 946 } 947 948 /* make sure diagonal entry is included */ 949 if (diagonal_fill && fill[prow] == -1) { 950 fm = n; 951 while (fill[fm] < prow) fm = fill[fm]; 952 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 953 fill[fm] = prow; 954 im[prow] = 0; 955 nzf++; 956 dcount++; 957 } 958 959 nzi = 0; 960 row = fill[n]; 961 while (row < prow) { 962 incrlev = im[row] + 1; 963 nz = dloc[row]; 964 xi = ajnew + ainew[row] + nz + 1; 965 flev = ajfill + ainew[row] + nz + 1; 966 nnz = ainew[row+1] - ainew[row] - nz - 1; 967 fm = row; 968 while (nnz-- > 0) { 969 idx = *xi++ ; 970 if (*flev + incrlev > levels) { 971 flev++; 972 continue; 973 } 974 do { 975 m = fm; 976 fm = fill[m]; 977 } while (fm < idx); 978 if (fm != idx) { 979 im[idx] = *flev + incrlev; 980 fill[m] = idx; 981 fill[idx] = fm; 982 fm = idx; 983 nzf++; 984 } else { 985 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 986 } 987 flev++; 988 } 989 row = fill[row]; 990 nzi++; 991 } 992 /* copy new filled row into permanent storage */ 993 ainew[prow+1] = ainew[prow] + nzf; 994 if (ainew[prow+1] > jmax) { 995 996 /* estimate how much additional space we will need */ 997 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 998 /* just double the memory each time */ 999 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1000 PetscInt maxadd = jmax; 1001 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1002 jmax += maxadd; 1003 1004 /* allocate a longer ajnew and ajfill */ 1005 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1006 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1007 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1008 ajnew = xi; 1009 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1010 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1011 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1012 ajfill = xi; 1013 reallocs++; /* count how many times we realloc */ 1014 } 1015 xi = ajnew + ainew[prow] ; 1016 flev = ajfill + ainew[prow] ; 1017 dloc[prow] = nzi; 1018 fm = fill[n]; 1019 while (nzf--) { 1020 *xi++ = fm ; 1021 *flev++ = im[fm]; 1022 fm = fill[fm]; 1023 } 1024 /* make sure row has diagonal entry */ 1025 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1026 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1027 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1028 } 1029 } 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1033 ierr = PetscFree(fill);CHKERRQ(ierr); 1034 ierr = PetscFree(im);CHKERRQ(ierr); 1035 1036 { 1037 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1042 if (diagonal_fill) { 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1044 } 1045 } 1046 1047 /* put together the new matrix */ 1048 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1049 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1050 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1051 PetscLogObjectParent(*fact,isicol); 1052 b = (Mat_SeqAIJ*)(*fact)->data; 1053 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1054 b->singlemalloc = PETSC_FALSE; 1055 len = (ainew[n] )*sizeof(PetscScalar); 1056 /* the next line frees the default space generated by the Create() */ 1057 ierr = PetscFree(b->a);CHKERRQ(ierr); 1058 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1059 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1060 b->j = ajnew; 1061 b->i = ainew; 1062 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1063 b->diag = dloc; 1064 b->ilen = 0; 1065 b->imax = 0; 1066 b->row = isrow; 1067 b->col = iscol; 1068 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1069 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1070 b->icol = isicol; 1071 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1072 /* In b structure: Free imax, ilen, old a, old j. 1073 Allocate dloc, solve_work, new a, new j */ 1074 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1075 b->maxnz = b->nz = ainew[n] ; 1076 (*fact)->factor = FACTOR_LU; 1077 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1078 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1079 (*fact)->info.factor_mallocs = reallocs; 1080 (*fact)->info.fill_ratio_given = f; 1081 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #include "src/mat/impls/sbaij/seq/sbaij.h" 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1088 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1089 { 1090 Mat C = *B; 1091 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1092 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1093 IS ip=b->row; 1094 PetscErrorCode ierr; 1095 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1096 PetscInt *ai=a->i,*aj=a->j; 1097 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1098 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1099 PetscReal zeropivot,rs,shiftnz; 1100 PetscTruth shiftpd; 1101 ChShift_Ctx sctx; 1102 PetscInt newshift; 1103 1104 PetscFunctionBegin; 1105 shiftnz = info->shiftnz; 1106 shiftpd = info->shiftpd; 1107 zeropivot = info->zeropivot; 1108 1109 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1110 1111 /* initialization */ 1112 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1113 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1114 jl = il + mbs; 1115 rtmp = (MatScalar*)(jl + mbs); 1116 1117 sctx.shift_amount = 0; 1118 sctx.nshift = 0; 1119 do { 1120 sctx.chshift = PETSC_FALSE; 1121 for (i=0; i<mbs; i++) { 1122 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1123 } 1124 1125 for (k = 0; k<mbs; k++){ 1126 bval = ba + bi[k]; 1127 /* initialize k-th row by the perm[k]-th row of A */ 1128 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1129 for (j = jmin; j < jmax; j++){ 1130 col = rip[aj[j]]; 1131 if (col >= k){ /* only take upper triangular entry */ 1132 rtmp[col] = aa[j]; 1133 *bval++ = 0.0; /* for in-place factorization */ 1134 } 1135 } 1136 /* shift the diagonal of the matrix */ 1137 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1138 1139 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1140 dk = rtmp[k]; 1141 i = jl[k]; /* first row to be added to k_th row */ 1142 1143 while (i < k){ 1144 nexti = jl[i]; /* next row to be added to k_th row */ 1145 1146 /* compute multiplier, update diag(k) and U(i,k) */ 1147 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1148 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1149 dk += uikdi*ba[ili]; 1150 ba[ili] = uikdi; /* -U(i,k) */ 1151 1152 /* add multiple of row i to k-th row */ 1153 jmin = ili + 1; jmax = bi[i+1]; 1154 if (jmin < jmax){ 1155 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1156 /* update il and jl for row i */ 1157 il[i] = jmin; 1158 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1159 } 1160 i = nexti; 1161 } 1162 1163 /* shift the diagonals when zero pivot is detected */ 1164 /* compute rs=sum of abs(off-diagonal) */ 1165 rs = 0.0; 1166 jmin = bi[k]+1; 1167 nz = bi[k+1] - jmin; 1168 if (nz){ 1169 bcol = bj + jmin; 1170 while (nz--){ 1171 rs += PetscAbsScalar(rtmp[*bcol++]); 1172 } 1173 } 1174 1175 sctx.rs = rs; 1176 sctx.pv = dk; 1177 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1178 if (newshift == 1){ 1179 break; /* sctx.shift_amount is updated */ 1180 } else if (newshift == -1){ 1181 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1182 } 1183 1184 /* copy data into U(k,:) */ 1185 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1186 jmin = bi[k]+1; jmax = bi[k+1]; 1187 if (jmin < jmax) { 1188 for (j=jmin; j<jmax; j++){ 1189 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1190 } 1191 /* add the k-th row into il and jl */ 1192 il[k] = jmin; 1193 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1194 } 1195 } 1196 } while (sctx.chshift); 1197 ierr = PetscFree(il);CHKERRQ(ierr); 1198 1199 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1200 C->factor = FACTOR_CHOLESKY; 1201 C->assembled = PETSC_TRUE; 1202 C->preallocated = PETSC_TRUE; 1203 PetscLogFlops(C->m); 1204 if (sctx.nshift){ 1205 if (shiftnz) { 1206 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1207 } else if (shiftpd) { 1208 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1209 } 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #include "petscbt.h" 1215 #include "src/mat/utils/freespace.h" 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1218 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1219 { 1220 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1221 Mat_SeqSBAIJ *b; 1222 Mat B; 1223 PetscErrorCode ierr; 1224 PetscTruth perm_identity; 1225 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1226 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1227 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1228 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1229 PetscReal fill=info->fill,levels=info->levels; 1230 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1231 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1232 PetscBT lnkbt; 1233 1234 PetscFunctionBegin; 1235 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1236 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1237 1238 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1239 ui[0] = 0; 1240 1241 /* special case that simply copies fill pattern */ 1242 if (!levels && perm_identity) { 1243 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1244 for (i=0; i<am; i++) { 1245 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1246 } 1247 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1248 cols = uj; 1249 for (i=0; i<am; i++) { 1250 aj = a->j + a->diag[i]; 1251 ncols = ui[i+1] - ui[i]; 1252 for (j=0; j<ncols; j++) *cols++ = *aj++; 1253 } 1254 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1255 /* initialization */ 1256 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1257 1258 /* jl: linked list for storing indices of the pivot rows 1259 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1260 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1261 il = jl + am; 1262 uj_ptr = (PetscInt**)(il + am); 1263 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1264 for (i=0; i<am; i++){ 1265 jl[i] = am; il[i] = 0; 1266 } 1267 1268 /* create and initialize a linked list for storing column indices of the active row k */ 1269 nlnk = am + 1; 1270 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1271 1272 /* initial FreeSpace size is fill*(ai[am]+1) */ 1273 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1274 current_space = free_space; 1275 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1276 current_space_lvl = free_space_lvl; 1277 1278 for (k=0; k<am; k++){ /* for each active row k */ 1279 /* initialize lnk by the column indices of row rip[k] of A */ 1280 nzk = 0; 1281 ncols = ai[rip[k]+1] - ai[rip[k]]; 1282 ncols_upper = 0; 1283 cols = cols_lvl + am; 1284 for (j=0; j<ncols; j++){ 1285 i = rip[*(aj + ai[rip[k]] + j)]; 1286 if (i >= k){ /* only take upper triangular entry */ 1287 cols[ncols_upper] = i; 1288 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1289 ncols_upper++; 1290 } 1291 } 1292 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1293 nzk += nlnk; 1294 1295 /* update lnk by computing fill-in for each pivot row to be merged in */ 1296 prow = jl[k]; /* 1st pivot row */ 1297 1298 while (prow < k){ 1299 nextprow = jl[prow]; 1300 1301 /* merge prow into k-th row */ 1302 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1303 jmax = ui[prow+1]; 1304 ncols = jmax-jmin; 1305 i = jmin - ui[prow]; 1306 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1307 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1308 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1309 nzk += nlnk; 1310 1311 /* update il and jl for prow */ 1312 if (jmin < jmax){ 1313 il[prow] = jmin; 1314 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1315 } 1316 prow = nextprow; 1317 } 1318 1319 /* if free space is not available, make more free space */ 1320 if (current_space->local_remaining<nzk) { 1321 i = am - k + 1; /* num of unfactored rows */ 1322 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1323 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1324 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1325 reallocs++; 1326 } 1327 1328 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1329 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1330 1331 /* add the k-th row into il and jl */ 1332 if (nzk > 1){ 1333 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1334 jl[k] = jl[i]; jl[i] = k; 1335 il[k] = ui[k] + 1; 1336 } 1337 uj_ptr[k] = current_space->array; 1338 uj_lvl_ptr[k] = current_space_lvl->array; 1339 1340 current_space->array += nzk; 1341 current_space->local_used += nzk; 1342 current_space->local_remaining -= nzk; 1343 1344 current_space_lvl->array += nzk; 1345 current_space_lvl->local_used += nzk; 1346 current_space_lvl->local_remaining -= nzk; 1347 1348 ui[k+1] = ui[k] + nzk; 1349 } 1350 1351 if (ai[am] != 0) { 1352 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1353 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1354 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1355 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1356 } else { 1357 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1358 } 1359 1360 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1361 ierr = PetscFree(jl);CHKERRQ(ierr); 1362 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1363 1364 /* destroy list of free space and other temporary array(s) */ 1365 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1366 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1367 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1368 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1369 1370 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1371 1372 /* put together the new matrix in MATSEQSBAIJ format */ 1373 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1374 B = *fact; 1375 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1376 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1377 1378 b = (Mat_SeqSBAIJ*)B->data; 1379 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1380 b->singlemalloc = PETSC_FALSE; 1381 /* the next line frees the default space generated by the Create() */ 1382 ierr = PetscFree(b->a);CHKERRQ(ierr); 1383 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1384 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1385 b->j = uj; 1386 b->i = ui; 1387 b->diag = 0; 1388 b->ilen = 0; 1389 b->imax = 0; 1390 b->row = perm; 1391 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1392 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1393 b->icol = perm; 1394 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1395 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1396 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1397 b->maxnz = b->nz = ui[am]; 1398 1399 B->factor = FACTOR_CHOLESKY; 1400 B->info.factor_mallocs = reallocs; 1401 B->info.fill_ratio_given = fill; 1402 if (ai[am] != 0) { 1403 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1404 } else { 1405 B->info.fill_ratio_needed = 0.0; 1406 } 1407 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1408 if (perm_identity){ 1409 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1410 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1417 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1418 { 1419 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1420 Mat_SeqSBAIJ *b; 1421 Mat B; 1422 PetscErrorCode ierr; 1423 PetscTruth perm_identity; 1424 PetscReal fill = info->fill; 1425 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1426 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1427 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1428 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1429 PetscBT lnkbt; 1430 IS iperm; 1431 1432 PetscFunctionBegin; 1433 /* check whether perm is the identity mapping */ 1434 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1435 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1436 1437 if (!perm_identity){ 1438 /* check if perm is symmetric! */ 1439 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1440 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1441 for (i=0; i<mbs; i++) { 1442 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1443 } 1444 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1445 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1446 } 1447 1448 /* initialization */ 1449 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1450 ui[0] = 0; 1451 1452 /* jl: linked list for storing indices of the pivot rows 1453 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1454 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1455 il = jl + mbs; 1456 cols = il + mbs; 1457 ui_ptr = (PetscInt**)(cols + mbs); 1458 for (i=0; i<mbs; i++){ 1459 jl[i] = mbs; il[i] = 0; 1460 } 1461 1462 /* create and initialize a linked list for storing column indices of the active row k */ 1463 nlnk = mbs + 1; 1464 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1465 1466 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1467 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1468 current_space = free_space; 1469 1470 for (k=0; k<mbs; k++){ /* for each active row k */ 1471 /* initialize lnk by the column indices of row rip[k] of A */ 1472 nzk = 0; 1473 ncols = ai[rip[k]+1] - ai[rip[k]]; 1474 ncols_upper = 0; 1475 for (j=0; j<ncols; j++){ 1476 i = rip[*(aj + ai[rip[k]] + j)]; 1477 if (i >= k){ /* only take upper triangular entry */ 1478 cols[ncols_upper] = i; 1479 ncols_upper++; 1480 } 1481 } 1482 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1483 nzk += nlnk; 1484 1485 /* update lnk by computing fill-in for each pivot row to be merged in */ 1486 prow = jl[k]; /* 1st pivot row */ 1487 1488 while (prow < k){ 1489 nextprow = jl[prow]; 1490 /* merge prow into k-th row */ 1491 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1492 jmax = ui[prow+1]; 1493 ncols = jmax-jmin; 1494 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1495 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1496 nzk += nlnk; 1497 1498 /* update il and jl for prow */ 1499 if (jmin < jmax){ 1500 il[prow] = jmin; 1501 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1502 } 1503 prow = nextprow; 1504 } 1505 1506 /* if free space is not available, make more free space */ 1507 if (current_space->local_remaining<nzk) { 1508 i = mbs - k + 1; /* num of unfactored rows */ 1509 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1510 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1511 reallocs++; 1512 } 1513 1514 /* copy data into free space, then initialize lnk */ 1515 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1516 1517 /* add the k-th row into il and jl */ 1518 if (nzk-1 > 0){ 1519 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1520 jl[k] = jl[i]; jl[i] = k; 1521 il[k] = ui[k] + 1; 1522 } 1523 ui_ptr[k] = current_space->array; 1524 current_space->array += nzk; 1525 current_space->local_used += nzk; 1526 current_space->local_remaining -= nzk; 1527 1528 ui[k+1] = ui[k] + nzk; 1529 } 1530 1531 if (ai[mbs] != 0) { 1532 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1533 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1534 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1535 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1536 } else { 1537 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1538 } 1539 1540 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1541 ierr = PetscFree(jl);CHKERRQ(ierr); 1542 1543 /* destroy list of free space and other temporary array(s) */ 1544 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1545 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1546 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1547 1548 /* put together the new matrix in MATSEQSBAIJ format */ 1549 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1550 B = *fact; 1551 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1552 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1553 1554 b = (Mat_SeqSBAIJ*)B->data; 1555 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1556 b->singlemalloc = PETSC_FALSE; 1557 /* the next line frees the default space generated by the Create() */ 1558 ierr = PetscFree(b->a);CHKERRQ(ierr); 1559 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1560 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1561 b->j = uj; 1562 b->i = ui; 1563 b->diag = 0; 1564 b->ilen = 0; 1565 b->imax = 0; 1566 b->row = perm; 1567 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1568 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1569 b->icol = perm; 1570 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1571 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1572 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1573 b->maxnz = b->nz = ui[mbs]; 1574 1575 B->factor = FACTOR_CHOLESKY; 1576 B->info.factor_mallocs = reallocs; 1577 B->info.fill_ratio_given = fill; 1578 if (ai[mbs] != 0) { 1579 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1580 } else { 1581 B->info.fill_ratio_needed = 0.0; 1582 } 1583 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1584 if (perm_identity){ 1585 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1586 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590