1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 /* the next line frees the default space generated by the MatCreate() */ 233 ierr = PetscFree(b->a);CHKERRQ(ierr); 234 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 /* check out for identical nodes. If found, use inode functions */ 251 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 252 253 af = ((double)b->nz)/((double)a->nz) + .001; 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 256 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 257 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268 IS isicol; 269 PetscErrorCode ierr; 270 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 271 PetscInt *bi,*bj,*ajtmp; 272 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 273 PetscReal f; 274 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 275 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 276 PetscBT lnkbt; 277 278 PetscFunctionBegin; 279 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 280 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 281 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 282 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 283 284 /* get new row pointers */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 286 bi[0] = 0; 287 288 /* bdiag is location of diagonal in factor */ 289 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 290 bdiag[0] = 0; 291 292 /* linked list for storing column indices of the active row */ 293 nlnk = n + 1; 294 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 295 296 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 297 im = cols + n; 298 bi_ptr = (PetscInt**)(im + n); 299 300 /* initial FreeSpace size is f*(ai[n]+1) */ 301 f = info->fill; 302 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 303 current_space = free_space; 304 305 for (i=0; i<n; i++) { 306 /* copy previous fill into linked list */ 307 nzi = 0; 308 nnz = ai[r[i]+1] - ai[r[i]]; 309 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 310 ajtmp = aj + ai[r[i]]; 311 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 312 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 313 nzi += nlnk; 314 315 /* add pivot rows into linked list */ 316 row = lnk[n]; 317 while (row < i) { 318 nzbd = bdiag[row] - bi[row] + 1; 319 ajtmp = bi_ptr[row] + nzbd; 320 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 321 im[row] = nzbd; 322 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 323 nzi += nlnk; 324 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 325 326 row = lnk[row]; 327 } 328 329 bi[i+1] = bi[i] + nzi; 330 im[i] = nzi; 331 332 /* mark bdiag */ 333 nzbd = 0; 334 nnz = nzi; 335 k = lnk[n]; 336 while (nnz-- && k < i){ 337 nzbd++; 338 k = lnk[k]; 339 } 340 bdiag[i] = bi[i] + nzbd; 341 342 /* if free space is not available, make more free space */ 343 if (current_space->local_remaining<nzi) { 344 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 345 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 346 reallocs++; 347 } 348 349 /* copy data into free space, then initialize lnk */ 350 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 351 bi_ptr[i] = current_space->array; 352 current_space->array += nzi; 353 current_space->local_used += nzi; 354 current_space->local_remaining -= nzi; 355 } 356 if (ai[n] != 0) { 357 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 360 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 362 } else { 363 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 364 } 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 ierr = PetscFree(b->imax);CHKERRQ(ierr); 382 b->singlemalloc = PETSC_FALSE; 383 /* the next line frees the default space generated by the Create() */ 384 ierr = PetscFree(b->a);CHKERRQ(ierr); 385 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 386 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = 0; 391 b->imax = 0; 392 b->row = isrow; 393 b->col = iscol; 394 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 395 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 396 b->icol = isicol; 397 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 401 b->maxnz = b->nz = bi[n] ; 402 403 (*B)->factor = FACTOR_LU; 404 (*B)->info.factor_mallocs = reallocs; 405 (*B)->info.fill_ratio_given = f; 406 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 407 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 408 409 if (ai[n] != 0) { 410 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 411 } else { 412 (*B)->info.fill_ratio_needed = 0.0; 413 } 414 PetscFunctionReturn(0); 415 } 416 417 /* ----------------------------------------------------------- */ 418 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 422 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 423 { 424 Mat C=*B; 425 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 426 IS isrow = b->row,isicol = b->icol; 427 PetscErrorCode ierr; 428 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 429 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 430 PetscInt *diag_offset = b->diag,diag,*pj; 431 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 432 PetscReal zeropivot,rs,d,shift_nz; 433 PetscReal row_shift,shift_top=0.; 434 PetscTruth shift_pd; 435 LUShift_Ctx sctx; 436 PetscInt newshift; 437 438 PetscFunctionBegin; 439 shift_nz = info->shiftnz; 440 shift_pd = info->shiftpd; 441 zeropivot = info->zeropivot; 442 443 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 444 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 445 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 446 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 447 rtmps = rtmp; ics = ic; 448 449 if (!a->diag) { 450 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 451 } 452 /* if both shift schemes are chosen by user, only use shift_pd */ 453 if (shift_pd && shift_nz) shift_nz = 0.0; 454 if (shift_pd) { /* set shift_top=max{row_shift} */ 455 PetscInt *aai = a->i,*ddiag = a->diag; 456 shift_top = 0; 457 for (i=0; i<n; i++) { 458 d = PetscAbsScalar((a->a)[ddiag[i]]); 459 /* calculate amt of shift needed for this row */ 460 if (d<=0) { 461 row_shift = 0; 462 } else { 463 row_shift = -2*d; 464 } 465 v = a->a+aai[i]; 466 nz = aai[i+1] - aai[i]; 467 for (j=0; j<nz; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 sctx.shift_amount = 0; 474 sctx.shift_top = shift_top; 475 sctx.nshift = 0; 476 sctx.nshift_max = 5; 477 sctx.shift_lo = 0.; 478 sctx.shift_hi = 1.; 479 do { 480 sctx.lushift = PETSC_FALSE; 481 for (i=0; i<n; i++){ 482 nz = bi[i+1] - bi[i]; 483 bjtmp = bj + bi[i]; 484 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 485 486 /* load in initial (unfactored row) */ 487 nz = a->i[r[i]+1] - a->i[r[i]]; 488 ajtmp = a->j + a->i[r[i]]; 489 v = a->a + a->i[r[i]]; 490 for (j=0; j<nz; j++) { 491 rtmp[ics[ajtmp[j]]] = v[j]; 492 } 493 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 494 495 row = *bjtmp++; 496 while (row < i) { 497 pc = rtmp + row; 498 if (*pc != 0.0) { 499 pv = b->a + diag_offset[row]; 500 pj = b->j + diag_offset[row] + 1; 501 multiplier = *pc / *pv++; 502 *pc = multiplier; 503 nz = bi[row+1] - diag_offset[row] - 1; 504 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 505 PetscLogFlops(2*nz); 506 } 507 row = *bjtmp++; 508 } 509 /* finished row so stick it into b->a */ 510 pv = b->a + bi[i] ; 511 pj = b->j + bi[i] ; 512 nz = bi[i+1] - bi[i]; 513 diag = diag_offset[i] - bi[i]; 514 rs = 0.0; 515 for (j=0; j<nz; j++) { 516 pv[j] = rtmps[pj[j]]; 517 if (j != diag) rs += PetscAbsScalar(pv[j]); 518 } 519 520 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 521 sctx.rs = rs; 522 sctx.pv = pv[diag]; 523 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 524 if (newshift == 1){ 525 break; /* sctx.shift_amount is updated */ 526 } else if (newshift == -1){ 527 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 528 } 529 } 530 531 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 532 /* 533 * if no shift in this attempt & shifting & started shifting & can refine, 534 * then try lower shift 535 */ 536 sctx.shift_hi = info->shift_fraction; 537 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 538 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 539 sctx.lushift = PETSC_TRUE; 540 sctx.nshift++; 541 } 542 } while (sctx.lushift); 543 544 /* invert diagonal entries for simplier triangular solves */ 545 for (i=0; i<n; i++) { 546 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 547 } 548 549 ierr = PetscFree(rtmp);CHKERRQ(ierr); 550 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 552 C->factor = FACTOR_LU; 553 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 554 C->assembled = PETSC_TRUE; 555 PetscLogFlops(C->n); 556 if (sctx.nshift){ 557 if (shift_nz) { 558 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 559 } else if (shift_pd) { 560 b->lu_shift_fraction = info->shift_fraction; 561 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 569 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 570 { 571 PetscFunctionBegin; 572 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 573 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 574 PetscFunctionReturn(0); 575 } 576 577 578 /* ----------------------------------------------------------- */ 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatLUFactor_SeqAIJ" 581 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 582 { 583 PetscErrorCode ierr; 584 Mat C; 585 586 PetscFunctionBegin; 587 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 588 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 589 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 590 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 /* ----------------------------------------------------------- */ 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatSolve_SeqAIJ" 596 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 597 { 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 599 IS iscol = a->col,isrow = a->row; 600 PetscErrorCode ierr; 601 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 602 PetscInt nz,*rout,*cout; 603 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 604 605 PetscFunctionBegin; 606 if (!n) PetscFunctionReturn(0); 607 608 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 609 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 610 tmp = a->solve_work; 611 612 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 613 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 614 615 /* forward solve the lower triangular */ 616 tmp[0] = b[*r++]; 617 tmps = tmp; 618 for (i=1; i<n; i++) { 619 v = aa + ai[i] ; 620 vi = aj + ai[i] ; 621 nz = a->diag[i] - ai[i]; 622 sum = b[*r++]; 623 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 624 tmp[i] = sum; 625 } 626 627 /* backward solve the upper triangular */ 628 for (i=n-1; i>=0; i--){ 629 v = aa + a->diag[i] + 1; 630 vi = aj + a->diag[i] + 1; 631 nz = ai[i+1] - a->diag[i] - 1; 632 sum = tmp[i]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 635 } 636 637 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 638 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 639 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 640 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 641 PetscLogFlops(2*a->nz - A->n); 642 PetscFunctionReturn(0); 643 } 644 645 /* ----------------------------------------------------------- */ 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 648 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 649 { 650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 651 PetscErrorCode ierr; 652 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 653 PetscScalar *x,*b,*aa = a->a; 654 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 655 PetscInt adiag_i,i,*vi,nz,ai_i; 656 PetscScalar *v,sum; 657 #endif 658 659 PetscFunctionBegin; 660 if (!n) PetscFunctionReturn(0); 661 662 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 666 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 667 #else 668 /* forward solve the lower triangular */ 669 x[0] = b[0]; 670 for (i=1; i<n; i++) { 671 ai_i = ai[i]; 672 v = aa + ai_i; 673 vi = aj + ai_i; 674 nz = adiag[i] - ai_i; 675 sum = b[i]; 676 while (nz--) sum -= *v++ * x[*vi++]; 677 x[i] = sum; 678 } 679 680 /* backward solve the upper triangular */ 681 for (i=n-1; i>=0; i--){ 682 adiag_i = adiag[i]; 683 v = aa + adiag_i + 1; 684 vi = aj + adiag_i + 1; 685 nz = ai[i+1] - adiag_i - 1; 686 sum = x[i]; 687 while (nz--) sum -= *v++ * x[*vi++]; 688 x[i] = sum*aa[adiag_i]; 689 } 690 #endif 691 PetscLogFlops(2*a->nz - A->n); 692 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 699 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 700 { 701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 702 IS iscol = a->col,isrow = a->row; 703 PetscErrorCode ierr; 704 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 705 PetscInt nz,*rout,*cout; 706 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 707 708 PetscFunctionBegin; 709 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 710 711 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713 tmp = a->solve_work; 714 715 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 716 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 717 718 /* forward solve the lower triangular */ 719 tmp[0] = b[*r++]; 720 for (i=1; i<n; i++) { 721 v = aa + ai[i] ; 722 vi = aj + ai[i] ; 723 nz = a->diag[i] - ai[i]; 724 sum = b[*r++]; 725 while (nz--) sum -= *v++ * tmp[*vi++ ]; 726 tmp[i] = sum; 727 } 728 729 /* backward solve the upper triangular */ 730 for (i=n-1; i>=0; i--){ 731 v = aa + a->diag[i] + 1; 732 vi = aj + a->diag[i] + 1; 733 nz = ai[i+1] - a->diag[i] - 1; 734 sum = tmp[i]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum*aa[a->diag[i]]; 737 x[*c--] += tmp[i]; 738 } 739 740 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 742 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 743 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 744 PetscLogFlops(2*a->nz); 745 746 PetscFunctionReturn(0); 747 } 748 /* -------------------------------------------------------------------*/ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 751 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 752 { 753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 754 IS iscol = a->col,isrow = a->row; 755 PetscErrorCode ierr; 756 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 757 PetscInt nz,*rout,*cout,*diag = a->diag; 758 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 759 760 PetscFunctionBegin; 761 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 762 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 763 tmp = a->solve_work; 764 765 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 766 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 767 768 /* copy the b into temp work space according to permutation */ 769 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 770 771 /* forward solve the U^T */ 772 for (i=0; i<n; i++) { 773 v = aa + diag[i] ; 774 vi = aj + diag[i] + 1; 775 nz = ai[i+1] - diag[i] - 1; 776 s1 = tmp[i]; 777 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 778 while (nz--) { 779 tmp[*vi++ ] -= (*v++)*s1; 780 } 781 tmp[i] = s1; 782 } 783 784 /* backward solve the L^T */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + diag[i] - 1 ; 787 vi = aj + diag[i] - 1 ; 788 nz = diag[i] - ai[i]; 789 s1 = tmp[i]; 790 while (nz--) { 791 tmp[*vi-- ] -= (*v--)*s1; 792 } 793 } 794 795 /* copy tmp into x according to permutation */ 796 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 797 798 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 799 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 801 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 802 803 PetscLogFlops(2*a->nz-A->n); 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 809 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 810 { 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 812 IS iscol = a->col,isrow = a->row; 813 PetscErrorCode ierr; 814 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 815 PetscInt nz,*rout,*cout,*diag = a->diag; 816 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 817 818 PetscFunctionBegin; 819 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 820 821 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 823 tmp = a->solve_work; 824 825 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 826 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 827 828 /* copy the b into temp work space according to permutation */ 829 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 830 831 /* forward solve the U^T */ 832 for (i=0; i<n; i++) { 833 v = aa + diag[i] ; 834 vi = aj + diag[i] + 1; 835 nz = ai[i+1] - diag[i] - 1; 836 tmp[i] *= *v++; 837 while (nz--) { 838 tmp[*vi++ ] -= (*v++)*tmp[i]; 839 } 840 } 841 842 /* backward solve the L^T */ 843 for (i=n-1; i>=0; i--){ 844 v = aa + diag[i] - 1 ; 845 vi = aj + diag[i] - 1 ; 846 nz = diag[i] - ai[i]; 847 while (nz--) { 848 tmp[*vi-- ] -= (*v--)*tmp[i]; 849 } 850 } 851 852 /* copy tmp into x according to permutation */ 853 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 854 855 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 857 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 858 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 859 860 PetscLogFlops(2*a->nz); 861 PetscFunctionReturn(0); 862 } 863 /* ----------------------------------------------------------------*/ 864 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 868 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 869 { 870 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 871 IS isicol; 872 PetscErrorCode ierr; 873 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 874 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 875 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 876 PetscInt incrlev,nnz,i,levels,diagonal_fill; 877 PetscTruth col_identity,row_identity; 878 PetscReal f; 879 880 PetscFunctionBegin; 881 f = info->fill; 882 levels = (PetscInt)info->levels; 883 diagonal_fill = (PetscInt)info->diagonal_fill; 884 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 885 886 /* special case that simply copies fill pattern */ 887 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 888 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 889 if (!levels && row_identity && col_identity) { 890 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 891 (*fact)->factor = FACTOR_LU; 892 b = (Mat_SeqAIJ*)(*fact)->data; 893 if (!b->diag) { 894 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 895 } 896 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 897 b->row = isrow; 898 b->col = iscol; 899 b->icol = isicol; 900 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 901 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 902 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 903 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 908 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 909 910 /* get new row pointers */ 911 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 912 ainew[0] = 0; 913 /* don't know how many column pointers are needed so estimate */ 914 jmax = (PetscInt)(f*(ai[n]+1)); 915 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 916 /* ajfill is level of fill for each fill entry */ 917 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 918 /* fill is a linked list of nonzeros in active row */ 919 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 920 /* im is level for each filled value */ 921 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 922 /* dloc is location of diagonal in factor */ 923 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 924 dloc[0] = 0; 925 for (prow=0; prow<n; prow++) { 926 927 /* copy current row into linked list */ 928 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 929 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 930 xi = aj + ai[r[prow]]; 931 fill[n] = n; 932 fill[prow] = -1; /* marker to indicate if diagonal exists */ 933 while (nz--) { 934 fm = n; 935 idx = ic[*xi++ ]; 936 do { 937 m = fm; 938 fm = fill[m]; 939 } while (fm < idx); 940 fill[m] = idx; 941 fill[idx] = fm; 942 im[idx] = 0; 943 } 944 945 /* make sure diagonal entry is included */ 946 if (diagonal_fill && fill[prow] == -1) { 947 fm = n; 948 while (fill[fm] < prow) fm = fill[fm]; 949 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 950 fill[fm] = prow; 951 im[prow] = 0; 952 nzf++; 953 dcount++; 954 } 955 956 nzi = 0; 957 row = fill[n]; 958 while (row < prow) { 959 incrlev = im[row] + 1; 960 nz = dloc[row]; 961 xi = ajnew + ainew[row] + nz + 1; 962 flev = ajfill + ainew[row] + nz + 1; 963 nnz = ainew[row+1] - ainew[row] - nz - 1; 964 fm = row; 965 while (nnz-- > 0) { 966 idx = *xi++ ; 967 if (*flev + incrlev > levels) { 968 flev++; 969 continue; 970 } 971 do { 972 m = fm; 973 fm = fill[m]; 974 } while (fm < idx); 975 if (fm != idx) { 976 im[idx] = *flev + incrlev; 977 fill[m] = idx; 978 fill[idx] = fm; 979 fm = idx; 980 nzf++; 981 } else { 982 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 983 } 984 flev++; 985 } 986 row = fill[row]; 987 nzi++; 988 } 989 /* copy new filled row into permanent storage */ 990 ainew[prow+1] = ainew[prow] + nzf; 991 if (ainew[prow+1] > jmax) { 992 993 /* estimate how much additional space we will need */ 994 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 995 /* just double the memory each time */ 996 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 997 PetscInt maxadd = jmax; 998 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 999 jmax += maxadd; 1000 1001 /* allocate a longer ajnew and ajfill */ 1002 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1003 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1004 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1005 ajnew = xi; 1006 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1007 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1008 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1009 ajfill = xi; 1010 reallocs++; /* count how many times we realloc */ 1011 } 1012 xi = ajnew + ainew[prow] ; 1013 flev = ajfill + ainew[prow] ; 1014 dloc[prow] = nzi; 1015 fm = fill[n]; 1016 while (nzf--) { 1017 *xi++ = fm ; 1018 *flev++ = im[fm]; 1019 fm = fill[fm]; 1020 } 1021 /* make sure row has diagonal entry */ 1022 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1023 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1024 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1025 } 1026 } 1027 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1028 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1029 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1030 ierr = PetscFree(fill);CHKERRQ(ierr); 1031 ierr = PetscFree(im);CHKERRQ(ierr); 1032 1033 { 1034 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1035 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1036 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1037 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1039 if (diagonal_fill) { 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1041 } 1042 } 1043 1044 /* put together the new matrix */ 1045 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1046 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1047 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1048 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1049 b = (Mat_SeqAIJ*)(*fact)->data; 1050 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1051 b->singlemalloc = PETSC_FALSE; 1052 len = (ainew[n] )*sizeof(PetscScalar); 1053 /* the next line frees the default space generated by the Create() */ 1054 ierr = PetscFree(b->a);CHKERRQ(ierr); 1055 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1056 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1057 b->j = ajnew; 1058 b->i = ainew; 1059 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1060 b->diag = dloc; 1061 b->ilen = 0; 1062 b->imax = 0; 1063 b->row = isrow; 1064 b->col = iscol; 1065 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1067 b->icol = isicol; 1068 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1069 /* In b structure: Free imax, ilen, old a, old j. 1070 Allocate dloc, solve_work, new a, new j */ 1071 ierr = PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1072 b->maxnz = b->nz = ainew[n] ; 1073 (*fact)->factor = FACTOR_LU; 1074 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1075 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1076 (*fact)->info.factor_mallocs = reallocs; 1077 (*fact)->info.fill_ratio_given = f; 1078 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #include "src/mat/impls/sbaij/seq/sbaij.h" 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1085 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1086 { 1087 Mat C = *B; 1088 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1089 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1090 IS ip=b->row; 1091 PetscErrorCode ierr; 1092 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1093 PetscInt *ai=a->i,*aj=a->j; 1094 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1095 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1096 PetscReal zeropivot,rs,shiftnz; 1097 PetscTruth shiftpd; 1098 ChShift_Ctx sctx; 1099 PetscInt newshift; 1100 1101 PetscFunctionBegin; 1102 shiftnz = info->shiftnz; 1103 shiftpd = info->shiftpd; 1104 zeropivot = info->zeropivot; 1105 1106 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1107 1108 /* initialization */ 1109 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1110 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1111 jl = il + mbs; 1112 rtmp = (MatScalar*)(jl + mbs); 1113 1114 sctx.shift_amount = 0; 1115 sctx.nshift = 0; 1116 do { 1117 sctx.chshift = PETSC_FALSE; 1118 for (i=0; i<mbs; i++) { 1119 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1120 } 1121 1122 for (k = 0; k<mbs; k++){ 1123 bval = ba + bi[k]; 1124 /* initialize k-th row by the perm[k]-th row of A */ 1125 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1126 for (j = jmin; j < jmax; j++){ 1127 col = rip[aj[j]]; 1128 if (col >= k){ /* only take upper triangular entry */ 1129 rtmp[col] = aa[j]; 1130 *bval++ = 0.0; /* for in-place factorization */ 1131 } 1132 } 1133 /* shift the diagonal of the matrix */ 1134 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1135 1136 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1137 dk = rtmp[k]; 1138 i = jl[k]; /* first row to be added to k_th row */ 1139 1140 while (i < k){ 1141 nexti = jl[i]; /* next row to be added to k_th row */ 1142 1143 /* compute multiplier, update diag(k) and U(i,k) */ 1144 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1145 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1146 dk += uikdi*ba[ili]; 1147 ba[ili] = uikdi; /* -U(i,k) */ 1148 1149 /* add multiple of row i to k-th row */ 1150 jmin = ili + 1; jmax = bi[i+1]; 1151 if (jmin < jmax){ 1152 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1153 /* update il and jl for row i */ 1154 il[i] = jmin; 1155 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1156 } 1157 i = nexti; 1158 } 1159 1160 /* shift the diagonals when zero pivot is detected */ 1161 /* compute rs=sum of abs(off-diagonal) */ 1162 rs = 0.0; 1163 jmin = bi[k]+1; 1164 nz = bi[k+1] - jmin; 1165 if (nz){ 1166 bcol = bj + jmin; 1167 while (nz--){ 1168 rs += PetscAbsScalar(rtmp[*bcol]); 1169 bcol++; 1170 } 1171 } 1172 1173 sctx.rs = rs; 1174 sctx.pv = dk; 1175 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1176 if (newshift == 1){ 1177 break; /* sctx.shift_amount is updated */ 1178 } else if (newshift == -1){ 1179 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1180 } 1181 1182 /* copy data into U(k,:) */ 1183 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1184 jmin = bi[k]+1; jmax = bi[k+1]; 1185 if (jmin < jmax) { 1186 for (j=jmin; j<jmax; j++){ 1187 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1188 } 1189 /* add the k-th row into il and jl */ 1190 il[k] = jmin; 1191 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1192 } 1193 } 1194 } while (sctx.chshift); 1195 ierr = PetscFree(il);CHKERRQ(ierr); 1196 1197 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1198 C->factor = FACTOR_CHOLESKY; 1199 C->assembled = PETSC_TRUE; 1200 C->preallocated = PETSC_TRUE; 1201 PetscLogFlops(C->m); 1202 if (sctx.nshift){ 1203 if (shiftnz) { 1204 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1205 } else if (shiftpd) { 1206 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1207 } 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1214 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1215 { 1216 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1217 Mat_SeqSBAIJ *b; 1218 Mat B; 1219 PetscErrorCode ierr; 1220 PetscTruth perm_identity; 1221 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1222 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1223 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1224 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1225 PetscReal fill=info->fill,levels=info->levels; 1226 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1227 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1228 PetscBT lnkbt; 1229 1230 PetscFunctionBegin; 1231 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1232 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1233 1234 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1235 ui[0] = 0; 1236 1237 /* special case that simply copies fill pattern */ 1238 if (!levels && perm_identity) { 1239 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1240 for (i=0; i<am; i++) { 1241 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1242 } 1243 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1244 cols = uj; 1245 for (i=0; i<am; i++) { 1246 aj = a->j + a->diag[i]; 1247 ncols = ui[i+1] - ui[i]; 1248 for (j=0; j<ncols; j++) *cols++ = *aj++; 1249 } 1250 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1251 /* initialization */ 1252 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1253 1254 /* jl: linked list for storing indices of the pivot rows 1255 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1256 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1257 il = jl + am; 1258 uj_ptr = (PetscInt**)(il + am); 1259 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1260 for (i=0; i<am; i++){ 1261 jl[i] = am; il[i] = 0; 1262 } 1263 1264 /* create and initialize a linked list for storing column indices of the active row k */ 1265 nlnk = am + 1; 1266 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1267 1268 /* initial FreeSpace size is fill*(ai[am]+1) */ 1269 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1270 current_space = free_space; 1271 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1272 current_space_lvl = free_space_lvl; 1273 1274 for (k=0; k<am; k++){ /* for each active row k */ 1275 /* initialize lnk by the column indices of row rip[k] of A */ 1276 nzk = 0; 1277 ncols = ai[rip[k]+1] - ai[rip[k]]; 1278 ncols_upper = 0; 1279 cols = cols_lvl + am; 1280 for (j=0; j<ncols; j++){ 1281 i = rip[*(aj + ai[rip[k]] + j)]; 1282 if (i >= k){ /* only take upper triangular entry */ 1283 cols[ncols_upper] = i; 1284 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1285 ncols_upper++; 1286 } 1287 } 1288 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1289 nzk += nlnk; 1290 1291 /* update lnk by computing fill-in for each pivot row to be merged in */ 1292 prow = jl[k]; /* 1st pivot row */ 1293 1294 while (prow < k){ 1295 nextprow = jl[prow]; 1296 1297 /* merge prow into k-th row */ 1298 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1299 jmax = ui[prow+1]; 1300 ncols = jmax-jmin; 1301 i = jmin - ui[prow]; 1302 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1303 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1304 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1305 nzk += nlnk; 1306 1307 /* update il and jl for prow */ 1308 if (jmin < jmax){ 1309 il[prow] = jmin; 1310 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1311 } 1312 prow = nextprow; 1313 } 1314 1315 /* if free space is not available, make more free space */ 1316 if (current_space->local_remaining<nzk) { 1317 i = am - k + 1; /* num of unfactored rows */ 1318 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1319 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1320 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1321 reallocs++; 1322 } 1323 1324 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1325 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1326 1327 /* add the k-th row into il and jl */ 1328 if (nzk > 1){ 1329 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1330 jl[k] = jl[i]; jl[i] = k; 1331 il[k] = ui[k] + 1; 1332 } 1333 uj_ptr[k] = current_space->array; 1334 uj_lvl_ptr[k] = current_space_lvl->array; 1335 1336 current_space->array += nzk; 1337 current_space->local_used += nzk; 1338 current_space->local_remaining -= nzk; 1339 1340 current_space_lvl->array += nzk; 1341 current_space_lvl->local_used += nzk; 1342 current_space_lvl->local_remaining -= nzk; 1343 1344 ui[k+1] = ui[k] + nzk; 1345 } 1346 1347 if (ai[am] != 0) { 1348 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1349 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1350 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1351 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1352 } else { 1353 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1354 } 1355 1356 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1357 ierr = PetscFree(jl);CHKERRQ(ierr); 1358 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1359 1360 /* destroy list of free space and other temporary array(s) */ 1361 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1362 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1363 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1364 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1365 1366 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1367 1368 /* put together the new matrix in MATSEQSBAIJ format */ 1369 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1370 B = *fact; 1371 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1372 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1373 1374 b = (Mat_SeqSBAIJ*)B->data; 1375 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1376 b->singlemalloc = PETSC_FALSE; 1377 /* the next line frees the default space generated by the Create() */ 1378 ierr = PetscFree(b->a);CHKERRQ(ierr); 1379 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1380 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1381 b->j = uj; 1382 b->i = ui; 1383 b->diag = 0; 1384 b->ilen = 0; 1385 b->imax = 0; 1386 b->row = perm; 1387 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1388 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1389 b->icol = perm; 1390 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1391 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1392 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1393 b->maxnz = b->nz = ui[am]; 1394 1395 B->factor = FACTOR_CHOLESKY; 1396 B->info.factor_mallocs = reallocs; 1397 B->info.fill_ratio_given = fill; 1398 if (ai[am] != 0) { 1399 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1400 } else { 1401 B->info.fill_ratio_needed = 0.0; 1402 } 1403 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1404 if (perm_identity){ 1405 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1406 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1413 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1414 { 1415 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1416 Mat_SeqSBAIJ *b; 1417 Mat B; 1418 PetscErrorCode ierr; 1419 PetscTruth perm_identity; 1420 PetscReal fill = info->fill; 1421 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1422 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1423 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1424 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1425 PetscBT lnkbt; 1426 IS iperm; 1427 1428 PetscFunctionBegin; 1429 /* check whether perm is the identity mapping */ 1430 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1431 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1432 1433 if (!perm_identity){ 1434 /* check if perm is symmetric! */ 1435 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1436 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1437 for (i=0; i<am; i++) { 1438 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1439 } 1440 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1441 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1442 } 1443 1444 /* initialization */ 1445 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1446 ui[0] = 0; 1447 1448 /* jl: linked list for storing indices of the pivot rows 1449 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1450 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1451 il = jl + am; 1452 cols = il + am; 1453 ui_ptr = (PetscInt**)(cols + am); 1454 for (i=0; i<am; i++){ 1455 jl[i] = am; il[i] = 0; 1456 } 1457 1458 /* create and initialize a linked list for storing column indices of the active row k */ 1459 nlnk = am + 1; 1460 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1461 1462 /* initial FreeSpace size is fill*(ai[am]+1) */ 1463 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1464 current_space = free_space; 1465 1466 for (k=0; k<am; k++){ /* for each active row k */ 1467 /* initialize lnk by the column indices of row rip[k] of A */ 1468 nzk = 0; 1469 ncols = ai[rip[k]+1] - ai[rip[k]]; 1470 ncols_upper = 0; 1471 for (j=0; j<ncols; j++){ 1472 i = rip[*(aj + ai[rip[k]] + j)]; 1473 if (i >= k){ /* only take upper triangular entry */ 1474 cols[ncols_upper] = i; 1475 ncols_upper++; 1476 } 1477 } 1478 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1479 nzk += nlnk; 1480 1481 /* update lnk by computing fill-in for each pivot row to be merged in */ 1482 prow = jl[k]; /* 1st pivot row */ 1483 1484 while (prow < k){ 1485 nextprow = jl[prow]; 1486 /* merge prow into k-th row */ 1487 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1488 jmax = ui[prow+1]; 1489 ncols = jmax-jmin; 1490 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1491 ierr = PetscLLAddSorted(ncols,uj_ptr,prow,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1492 nzk += nlnk; 1493 1494 /* update il and jl for prow */ 1495 if (jmin < jmax){ 1496 il[prow] = jmin; 1497 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1498 } 1499 prow = nextprow; 1500 } 1501 1502 /* if free space is not available, make more free space */ 1503 if (current_space->local_remaining<nzk) { 1504 i = am - k + 1; /* num of unfactored rows */ 1505 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1506 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1507 reallocs++; 1508 } 1509 1510 /* copy data into free space, then initialize lnk */ 1511 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1512 1513 /* add the k-th row into il and jl */ 1514 if (nzk-1 > 0){ 1515 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1516 jl[k] = jl[i]; jl[i] = k; 1517 il[k] = ui[k] + 1; 1518 } 1519 ui_ptr[k] = current_space->array; 1520 current_space->array += nzk; 1521 current_space->local_used += nzk; 1522 current_space->local_remaining -= nzk; 1523 1524 ui[k+1] = ui[k] + nzk; 1525 } 1526 1527 if (ai[am] != 0) { 1528 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1529 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1530 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1531 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1532 } else { 1533 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1534 } 1535 1536 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1537 ierr = PetscFree(jl);CHKERRQ(ierr); 1538 1539 /* destroy list of free space and other temporary array(s) */ 1540 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1541 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1542 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1543 1544 /* put together the new matrix in MATSEQSBAIJ format */ 1545 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1546 B = *fact; 1547 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1548 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1549 1550 b = (Mat_SeqSBAIJ*)B->data; 1551 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1552 b->singlemalloc = PETSC_FALSE; 1553 /* the next line frees the default space generated by the Create() */ 1554 ierr = PetscFree(b->a);CHKERRQ(ierr); 1555 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1556 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1557 b->j = uj; 1558 b->i = ui; 1559 b->diag = 0; 1560 b->ilen = 0; 1561 b->imax = 0; 1562 b->row = perm; 1563 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1564 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1565 b->icol = perm; 1566 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1567 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1568 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1569 b->maxnz = b->nz = ui[am]; 1570 1571 B->factor = FACTOR_CHOLESKY; 1572 B->info.factor_mallocs = reallocs; 1573 B->info.fill_ratio_given = fill; 1574 if (ai[am] != 0) { 1575 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1576 } else { 1577 B->info.fill_ratio_needed = 0.0; 1578 } 1579 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1580 if (perm_identity){ 1581 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1582 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586