1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 /* the next line frees the default space generated by the MatCreate() */ 233 ierr = PetscFree(b->a);CHKERRQ(ierr); 234 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 /* check out for identical nodes. If found, use inode functions */ 251 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 252 253 af = ((double)b->nz)/((double)a->nz) + .001; 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 256 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 257 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 265 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 268 IS isicol; 269 PetscErrorCode ierr; 270 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 271 PetscInt *bi,*bj,*ajtmp; 272 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 273 PetscReal f; 274 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 275 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 276 PetscBT lnkbt; 277 278 PetscFunctionBegin; 279 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 280 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 281 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 282 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 283 284 /* get new row pointers */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 286 bi[0] = 0; 287 288 /* bdiag is location of diagonal in factor */ 289 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 290 bdiag[0] = 0; 291 292 /* linked list for storing column indices of the active row */ 293 nlnk = n + 1; 294 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 295 296 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 297 im = cols + n; 298 bi_ptr = (PetscInt**)(im + n); 299 300 /* initial FreeSpace size is f*(ai[n]+1) */ 301 f = info->fill; 302 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 303 current_space = free_space; 304 305 for (i=0; i<n; i++) { 306 /* copy previous fill into linked list */ 307 nzi = 0; 308 nnz = ai[r[i]+1] - ai[r[i]]; 309 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 310 ajtmp = aj + ai[r[i]]; 311 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 312 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 313 nzi += nlnk; 314 315 /* add pivot rows into linked list */ 316 row = lnk[n]; 317 while (row < i) { 318 nzbd = bdiag[row] - bi[row] + 1; 319 ajtmp = bi_ptr[row] + nzbd; 320 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 321 im[row] = nzbd; 322 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 323 nzi += nlnk; 324 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 325 326 row = lnk[row]; 327 } 328 329 bi[i+1] = bi[i] + nzi; 330 im[i] = nzi; 331 332 /* mark bdiag */ 333 nzbd = 0; 334 nnz = nzi; 335 k = lnk[n]; 336 while (nnz-- && k < i){ 337 nzbd++; 338 k = lnk[k]; 339 } 340 bdiag[i] = bi[i] + nzbd; 341 342 /* if free space is not available, make more free space */ 343 if (current_space->local_remaining<nzi) { 344 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 345 printf("reallocate %d. Previous space %d\n",nnz,(int)(f*ai[n])); 346 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 347 reallocs++; 348 } 349 350 /* copy data into free space, then initialize lnk */ 351 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 352 bi_ptr[i] = current_space->array; 353 current_space->array += nzi; 354 current_space->local_used += nzi; 355 current_space->local_remaining -= nzi; 356 } 357 if (ai[n] != 0) { 358 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 360 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 362 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 363 } else { 364 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 365 } 366 367 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 368 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 369 370 /* destroy list of free space and other temporary array(s) */ 371 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 372 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374 ierr = PetscFree(cols);CHKERRQ(ierr); 375 376 /* put together the new matrix */ 377 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 378 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 379 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 380 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 381 b = (Mat_SeqAIJ*)(*B)->data; 382 ierr = PetscFree(b->imax);CHKERRQ(ierr); 383 b->singlemalloc = PETSC_FALSE; 384 /* the next line frees the default space generated by the Create() */ 385 ierr = PetscFree(b->a);CHKERRQ(ierr); 386 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 387 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 388 b->j = bj; 389 b->i = bi; 390 b->diag = bdiag; 391 b->ilen = 0; 392 b->imax = 0; 393 b->row = isrow; 394 b->col = iscol; 395 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 396 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 397 b->icol = isicol; 398 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 399 400 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 401 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 402 b->maxnz = b->nz = bi[n] ; 403 404 (*B)->factor = FACTOR_LU; 405 (*B)->info.factor_mallocs = reallocs; 406 (*B)->info.fill_ratio_given = f; 407 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 408 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 409 410 if (ai[n] != 0) { 411 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 412 } else { 413 (*B)->info.fill_ratio_needed = 0.0; 414 } 415 PetscFunctionReturn(0); 416 } 417 418 /* ----------------------------------------------------------- */ 419 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 423 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 424 { 425 Mat C=*B; 426 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 427 IS isrow = b->row,isicol = b->icol; 428 PetscErrorCode ierr; 429 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 430 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 431 PetscInt *diag_offset = b->diag,diag,*pj; 432 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 433 PetscReal zeropivot,rs,d,shift_nz; 434 PetscReal row_shift,shift_top=0.; 435 PetscTruth shift_pd; 436 LUShift_Ctx sctx; 437 PetscInt newshift; 438 439 PetscFunctionBegin; 440 shift_nz = info->shiftnz; 441 shift_pd = info->shiftpd; 442 zeropivot = info->zeropivot; 443 444 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 445 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 446 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 447 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 448 rtmps = rtmp; ics = ic; 449 450 if (!a->diag) { 451 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 452 } 453 /* if both shift schemes are chosen by user, only use shift_pd */ 454 if (shift_pd && shift_nz) shift_nz = 0.0; 455 if (shift_pd) { /* set shift_top=max{row_shift} */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 nz = aai[i+1] - aai[i]; 468 for (j=0; j<nz; j++) 469 row_shift += PetscAbsScalar(v[j]); 470 if (row_shift>shift_top) shift_top = row_shift; 471 } 472 } 473 474 sctx.shift_amount = 0; 475 sctx.shift_top = shift_top; 476 sctx.nshift = 0; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 do { 481 sctx.lushift = PETSC_FALSE; 482 for (i=0; i<n; i++){ 483 nz = bi[i+1] - bi[i]; 484 bjtmp = bj + bi[i]; 485 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 486 487 /* load in initial (unfactored row) */ 488 nz = a->i[r[i]+1] - a->i[r[i]]; 489 ajtmp = a->j + a->i[r[i]]; 490 v = a->a + a->i[r[i]]; 491 for (j=0; j<nz; j++) { 492 rtmp[ics[ajtmp[j]]] = v[j]; 493 } 494 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 495 496 row = *bjtmp++; 497 while (row < i) { 498 pc = rtmp + row; 499 if (*pc != 0.0) { 500 pv = b->a + diag_offset[row]; 501 pj = b->j + diag_offset[row] + 1; 502 multiplier = *pc / *pv++; 503 *pc = multiplier; 504 nz = bi[row+1] - diag_offset[row] - 1; 505 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 506 PetscLogFlops(2*nz); 507 } 508 row = *bjtmp++; 509 } 510 /* finished row so stick it into b->a */ 511 pv = b->a + bi[i] ; 512 pj = b->j + bi[i] ; 513 nz = bi[i+1] - bi[i]; 514 diag = diag_offset[i] - bi[i]; 515 rs = 0.0; 516 for (j=0; j<nz; j++) { 517 pv[j] = rtmps[pj[j]]; 518 if (j != diag) rs += PetscAbsScalar(pv[j]); 519 } 520 521 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 522 sctx.rs = rs; 523 sctx.pv = pv[diag]; 524 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 525 if (newshift == 1){ 526 break; /* sctx.shift_amount is updated */ 527 } else if (newshift == -1){ 528 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 529 } 530 } 531 532 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 PetscLogFlops(C->n); 557 if (sctx.nshift){ 558 if (shift_nz) { 559 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 560 } else if (shift_pd) { 561 b->lu_shift_fraction = info->shift_fraction; 562 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 570 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 571 { 572 PetscFunctionBegin; 573 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 574 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 575 PetscFunctionReturn(0); 576 } 577 578 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatLUFactor_SeqAIJ" 582 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 583 { 584 PetscErrorCode ierr; 585 Mat C; 586 587 PetscFunctionBegin; 588 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 589 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 590 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 591 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 /* ----------------------------------------------------------- */ 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatSolve_SeqAIJ" 597 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 598 { 599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 600 IS iscol = a->col,isrow = a->row; 601 PetscErrorCode ierr; 602 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 603 PetscInt nz,*rout,*cout; 604 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 605 606 PetscFunctionBegin; 607 if (!n) PetscFunctionReturn(0); 608 609 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 610 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 611 tmp = a->solve_work; 612 613 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 614 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 615 616 /* forward solve the lower triangular */ 617 tmp[0] = b[*r++]; 618 tmps = tmp; 619 for (i=1; i<n; i++) { 620 v = aa + ai[i] ; 621 vi = aj + ai[i] ; 622 nz = a->diag[i] - ai[i]; 623 sum = b[*r++]; 624 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 625 tmp[i] = sum; 626 } 627 628 /* backward solve the upper triangular */ 629 for (i=n-1; i>=0; i--){ 630 v = aa + a->diag[i] + 1; 631 vi = aj + a->diag[i] + 1; 632 nz = ai[i+1] - a->diag[i] - 1; 633 sum = tmp[i]; 634 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 635 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 636 } 637 638 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 639 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 640 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 642 PetscLogFlops(2*a->nz - A->n); 643 PetscFunctionReturn(0); 644 } 645 646 /* ----------------------------------------------------------- */ 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 649 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 650 { 651 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 652 PetscErrorCode ierr; 653 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 654 PetscScalar *x,*b,*aa = a->a; 655 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 656 PetscInt adiag_i,i,*vi,nz,ai_i; 657 PetscScalar *v,sum; 658 #endif 659 660 PetscFunctionBegin; 661 if (!n) PetscFunctionReturn(0); 662 663 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 664 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 665 666 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 667 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 668 #else 669 /* forward solve the lower triangular */ 670 x[0] = b[0]; 671 for (i=1; i<n; i++) { 672 ai_i = ai[i]; 673 v = aa + ai_i; 674 vi = aj + ai_i; 675 nz = adiag[i] - ai_i; 676 sum = b[i]; 677 while (nz--) sum -= *v++ * x[*vi++]; 678 x[i] = sum; 679 } 680 681 /* backward solve the upper triangular */ 682 for (i=n-1; i>=0; i--){ 683 adiag_i = adiag[i]; 684 v = aa + adiag_i + 1; 685 vi = aj + adiag_i + 1; 686 nz = ai[i+1] - adiag_i - 1; 687 sum = x[i]; 688 while (nz--) sum -= *v++ * x[*vi++]; 689 x[i] = sum*aa[adiag_i]; 690 } 691 #endif 692 PetscLogFlops(2*a->nz - A->n); 693 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 694 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 700 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 701 { 702 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 703 IS iscol = a->col,isrow = a->row; 704 PetscErrorCode ierr; 705 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 706 PetscInt nz,*rout,*cout; 707 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 708 709 PetscFunctionBegin; 710 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 711 712 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 713 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 714 tmp = a->solve_work; 715 716 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 717 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 718 719 /* forward solve the lower triangular */ 720 tmp[0] = b[*r++]; 721 for (i=1; i<n; i++) { 722 v = aa + ai[i] ; 723 vi = aj + ai[i] ; 724 nz = a->diag[i] - ai[i]; 725 sum = b[*r++]; 726 while (nz--) sum -= *v++ * tmp[*vi++ ]; 727 tmp[i] = sum; 728 } 729 730 /* backward solve the upper triangular */ 731 for (i=n-1; i>=0; i--){ 732 v = aa + a->diag[i] + 1; 733 vi = aj + a->diag[i] + 1; 734 nz = ai[i+1] - a->diag[i] - 1; 735 sum = tmp[i]; 736 while (nz--) sum -= *v++ * tmp[*vi++ ]; 737 tmp[i] = sum*aa[a->diag[i]]; 738 x[*c--] += tmp[i]; 739 } 740 741 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 743 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 745 PetscLogFlops(2*a->nz); 746 747 PetscFunctionReturn(0); 748 } 749 /* -------------------------------------------------------------------*/ 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 752 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 753 { 754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 755 IS iscol = a->col,isrow = a->row; 756 PetscErrorCode ierr; 757 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 758 PetscInt nz,*rout,*cout,*diag = a->diag; 759 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 760 761 PetscFunctionBegin; 762 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 tmp = a->solve_work; 765 766 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 767 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 768 769 /* copy the b into temp work space according to permutation */ 770 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 771 772 /* forward solve the U^T */ 773 for (i=0; i<n; i++) { 774 v = aa + diag[i] ; 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = tmp[i]; 778 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 779 while (nz--) { 780 tmp[*vi++ ] -= (*v++)*s1; 781 } 782 tmp[i] = s1; 783 } 784 785 /* backward solve the L^T */ 786 for (i=n-1; i>=0; i--){ 787 v = aa + diag[i] - 1 ; 788 vi = aj + diag[i] - 1 ; 789 nz = diag[i] - ai[i]; 790 s1 = tmp[i]; 791 while (nz--) { 792 tmp[*vi-- ] -= (*v--)*s1; 793 } 794 } 795 796 /* copy tmp into x according to permutation */ 797 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 798 799 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 800 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 801 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 802 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 803 804 PetscLogFlops(2*a->nz-A->n); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 810 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 811 { 812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 813 IS iscol = a->col,isrow = a->row; 814 PetscErrorCode ierr; 815 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 816 PetscInt nz,*rout,*cout,*diag = a->diag; 817 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 818 819 PetscFunctionBegin; 820 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 821 822 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 823 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 824 tmp = a->solve_work; 825 826 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 827 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 828 829 /* copy the b into temp work space according to permutation */ 830 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 831 832 /* forward solve the U^T */ 833 for (i=0; i<n; i++) { 834 v = aa + diag[i] ; 835 vi = aj + diag[i] + 1; 836 nz = ai[i+1] - diag[i] - 1; 837 tmp[i] *= *v++; 838 while (nz--) { 839 tmp[*vi++ ] -= (*v++)*tmp[i]; 840 } 841 } 842 843 /* backward solve the L^T */ 844 for (i=n-1; i>=0; i--){ 845 v = aa + diag[i] - 1 ; 846 vi = aj + diag[i] - 1 ; 847 nz = diag[i] - ai[i]; 848 while (nz--) { 849 tmp[*vi-- ] -= (*v--)*tmp[i]; 850 } 851 } 852 853 /* copy tmp into x according to permutation */ 854 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 855 856 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 857 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 858 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 PetscLogFlops(2*a->nz); 862 PetscFunctionReturn(0); 863 } 864 /* ----------------------------------------------------------------*/ 865 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 869 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 870 { 871 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 872 IS isicol; 873 PetscErrorCode ierr; 874 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 875 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 876 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 877 PetscInt incrlev,nnz,i,levels,diagonal_fill; 878 PetscTruth col_identity,row_identity; 879 PetscReal f; 880 881 PetscFunctionBegin; 882 f = info->fill; 883 levels = (PetscInt)info->levels; 884 diagonal_fill = (PetscInt)info->diagonal_fill; 885 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 886 887 /* special case that simply copies fill pattern */ 888 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 889 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 890 if (!levels && row_identity && col_identity) { 891 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 892 (*fact)->factor = FACTOR_LU; 893 b = (Mat_SeqAIJ*)(*fact)->data; 894 if (!b->diag) { 895 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 896 } 897 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 b->row = isrow; 899 b->col = iscol; 900 b->icol = isicol; 901 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 902 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 903 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 904 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 909 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 910 911 /* get new row pointers */ 912 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 913 ainew[0] = 0; 914 /* don't know how many column pointers are needed so estimate */ 915 jmax = (PetscInt)(f*(ai[n]+1)); 916 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 917 /* ajfill is level of fill for each fill entry */ 918 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 919 /* fill is a linked list of nonzeros in active row */ 920 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 921 /* im is level for each filled value */ 922 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 923 /* dloc is location of diagonal in factor */ 924 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 925 dloc[0] = 0; 926 for (prow=0; prow<n; prow++) { 927 928 /* copy current row into linked list */ 929 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 930 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 931 xi = aj + ai[r[prow]]; 932 fill[n] = n; 933 fill[prow] = -1; /* marker to indicate if diagonal exists */ 934 while (nz--) { 935 fm = n; 936 idx = ic[*xi++ ]; 937 do { 938 m = fm; 939 fm = fill[m]; 940 } while (fm < idx); 941 fill[m] = idx; 942 fill[idx] = fm; 943 im[idx] = 0; 944 } 945 946 /* make sure diagonal entry is included */ 947 if (diagonal_fill && fill[prow] == -1) { 948 fm = n; 949 while (fill[fm] < prow) fm = fill[fm]; 950 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 951 fill[fm] = prow; 952 im[prow] = 0; 953 nzf++; 954 dcount++; 955 } 956 957 nzi = 0; 958 row = fill[n]; 959 while (row < prow) { 960 incrlev = im[row] + 1; 961 nz = dloc[row]; 962 xi = ajnew + ainew[row] + nz + 1; 963 flev = ajfill + ainew[row] + nz + 1; 964 nnz = ainew[row+1] - ainew[row] - nz - 1; 965 fm = row; 966 while (nnz-- > 0) { 967 idx = *xi++ ; 968 if (*flev + incrlev > levels) { 969 flev++; 970 continue; 971 } 972 do { 973 m = fm; 974 fm = fill[m]; 975 } while (fm < idx); 976 if (fm != idx) { 977 im[idx] = *flev + incrlev; 978 fill[m] = idx; 979 fill[idx] = fm; 980 fm = idx; 981 nzf++; 982 } else { 983 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 984 } 985 flev++; 986 } 987 row = fill[row]; 988 nzi++; 989 } 990 /* copy new filled row into permanent storage */ 991 ainew[prow+1] = ainew[prow] + nzf; 992 if (ainew[prow+1] > jmax) { 993 994 /* estimate how much additional space we will need */ 995 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 996 /* just double the memory each time */ 997 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 998 PetscInt maxadd = jmax; 999 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1000 jmax += maxadd; 1001 1002 /* allocate a longer ajnew and ajfill */ 1003 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1004 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1005 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1006 ajnew = xi; 1007 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1008 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1009 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1010 ajfill = xi; 1011 reallocs++; /* count how many times we realloc */ 1012 } 1013 xi = ajnew + ainew[prow] ; 1014 flev = ajfill + ainew[prow] ; 1015 dloc[prow] = nzi; 1016 fm = fill[n]; 1017 while (nzf--) { 1018 *xi++ = fm ; 1019 *flev++ = im[fm]; 1020 fm = fill[fm]; 1021 } 1022 /* make sure row has diagonal entry */ 1023 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1024 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1025 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1026 } 1027 } 1028 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1029 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1030 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1031 ierr = PetscFree(fill);CHKERRQ(ierr); 1032 ierr = PetscFree(im);CHKERRQ(ierr); 1033 1034 { 1035 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1036 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1037 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1040 if (diagonal_fill) { 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1042 } 1043 } 1044 1045 /* put together the new matrix */ 1046 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1047 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1048 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1049 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1050 b = (Mat_SeqAIJ*)(*fact)->data; 1051 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1052 b->singlemalloc = PETSC_FALSE; 1053 len = (ainew[n] )*sizeof(PetscScalar); 1054 /* the next line frees the default space generated by the Create() */ 1055 ierr = PetscFree(b->a);CHKERRQ(ierr); 1056 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1057 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1058 b->j = ajnew; 1059 b->i = ainew; 1060 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1061 b->diag = dloc; 1062 b->ilen = 0; 1063 b->imax = 0; 1064 b->row = isrow; 1065 b->col = iscol; 1066 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1068 b->icol = isicol; 1069 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1070 /* In b structure: Free imax, ilen, old a, old j. 1071 Allocate dloc, solve_work, new a, new j */ 1072 ierr = PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1073 b->maxnz = b->nz = ainew[n] ; 1074 (*fact)->factor = FACTOR_LU; 1075 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1076 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1077 (*fact)->info.factor_mallocs = reallocs; 1078 (*fact)->info.fill_ratio_given = f; 1079 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #include "src/mat/impls/sbaij/seq/sbaij.h" 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1086 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1087 { 1088 Mat C = *B; 1089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1090 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1091 IS ip=b->row; 1092 PetscErrorCode ierr; 1093 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1094 PetscInt *ai=a->i,*aj=a->j; 1095 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1096 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1097 PetscReal zeropivot,rs,shiftnz; 1098 PetscTruth shiftpd; 1099 ChShift_Ctx sctx; 1100 PetscInt newshift; 1101 1102 PetscFunctionBegin; 1103 shiftnz = info->shiftnz; 1104 shiftpd = info->shiftpd; 1105 zeropivot = info->zeropivot; 1106 1107 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1108 1109 /* initialization */ 1110 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1111 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1112 jl = il + mbs; 1113 rtmp = (MatScalar*)(jl + mbs); 1114 1115 sctx.shift_amount = 0; 1116 sctx.nshift = 0; 1117 do { 1118 sctx.chshift = PETSC_FALSE; 1119 for (i=0; i<mbs; i++) { 1120 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1121 } 1122 1123 for (k = 0; k<mbs; k++){ 1124 bval = ba + bi[k]; 1125 /* initialize k-th row by the perm[k]-th row of A */ 1126 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1127 for (j = jmin; j < jmax; j++){ 1128 col = rip[aj[j]]; 1129 if (col >= k){ /* only take upper triangular entry */ 1130 rtmp[col] = aa[j]; 1131 *bval++ = 0.0; /* for in-place factorization */ 1132 } 1133 } 1134 /* shift the diagonal of the matrix */ 1135 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1136 1137 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1138 dk = rtmp[k]; 1139 i = jl[k]; /* first row to be added to k_th row */ 1140 1141 while (i < k){ 1142 nexti = jl[i]; /* next row to be added to k_th row */ 1143 1144 /* compute multiplier, update diag(k) and U(i,k) */ 1145 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1146 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1147 dk += uikdi*ba[ili]; 1148 ba[ili] = uikdi; /* -U(i,k) */ 1149 1150 /* add multiple of row i to k-th row */ 1151 jmin = ili + 1; jmax = bi[i+1]; 1152 if (jmin < jmax){ 1153 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1154 /* update il and jl for row i */ 1155 il[i] = jmin; 1156 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1157 } 1158 i = nexti; 1159 } 1160 1161 /* shift the diagonals when zero pivot is detected */ 1162 /* compute rs=sum of abs(off-diagonal) */ 1163 rs = 0.0; 1164 jmin = bi[k]+1; 1165 nz = bi[k+1] - jmin; 1166 if (nz){ 1167 bcol = bj + jmin; 1168 while (nz--){ 1169 rs += PetscAbsScalar(rtmp[*bcol]); 1170 bcol++; 1171 } 1172 } 1173 1174 sctx.rs = rs; 1175 sctx.pv = dk; 1176 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1177 if (newshift == 1){ 1178 break; /* sctx.shift_amount is updated */ 1179 } else if (newshift == -1){ 1180 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1181 } 1182 1183 /* copy data into U(k,:) */ 1184 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1185 jmin = bi[k]+1; jmax = bi[k+1]; 1186 if (jmin < jmax) { 1187 for (j=jmin; j<jmax; j++){ 1188 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1189 } 1190 /* add the k-th row into il and jl */ 1191 il[k] = jmin; 1192 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1193 } 1194 } 1195 } while (sctx.chshift); 1196 ierr = PetscFree(il);CHKERRQ(ierr); 1197 1198 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1199 C->factor = FACTOR_CHOLESKY; 1200 C->assembled = PETSC_TRUE; 1201 C->preallocated = PETSC_TRUE; 1202 PetscLogFlops(C->m); 1203 if (sctx.nshift){ 1204 if (shiftnz) { 1205 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1206 } else if (shiftpd) { 1207 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1208 } 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1215 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1216 { 1217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1218 Mat_SeqSBAIJ *b; 1219 Mat B; 1220 PetscErrorCode ierr; 1221 PetscTruth perm_identity; 1222 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1223 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1224 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1225 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1226 PetscReal fill=info->fill,levels=info->levels; 1227 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1228 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1229 PetscBT lnkbt; 1230 1231 PetscFunctionBegin; 1232 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1233 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1234 1235 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1236 ui[0] = 0; 1237 1238 /* special case that simply copies fill pattern */ 1239 if (!levels && perm_identity) { 1240 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1241 for (i=0; i<am; i++) { 1242 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1243 } 1244 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1245 cols = uj; 1246 for (i=0; i<am; i++) { 1247 aj = a->j + a->diag[i]; 1248 ncols = ui[i+1] - ui[i]; 1249 for (j=0; j<ncols; j++) *cols++ = *aj++; 1250 } 1251 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1252 /* initialization */ 1253 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1254 1255 /* jl: linked list for storing indices of the pivot rows 1256 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1257 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1258 il = jl + am; 1259 uj_ptr = (PetscInt**)(il + am); 1260 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1261 for (i=0; i<am; i++){ 1262 jl[i] = am; il[i] = 0; 1263 } 1264 1265 /* create and initialize a linked list for storing column indices of the active row k */ 1266 nlnk = am + 1; 1267 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1268 1269 /* initial FreeSpace size is fill*(ai[am]+1) */ 1270 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1271 current_space = free_space; 1272 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1273 current_space_lvl = free_space_lvl; 1274 1275 for (k=0; k<am; k++){ /* for each active row k */ 1276 /* initialize lnk by the column indices of row rip[k] of A */ 1277 nzk = 0; 1278 ncols = ai[rip[k]+1] - ai[rip[k]]; 1279 ncols_upper = 0; 1280 cols = cols_lvl + am; 1281 for (j=0; j<ncols; j++){ 1282 i = rip[*(aj + ai[rip[k]] + j)]; 1283 if (i >= k){ /* only take upper triangular entry */ 1284 cols[ncols_upper] = i; 1285 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1286 ncols_upper++; 1287 } 1288 } 1289 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1290 nzk += nlnk; 1291 1292 /* update lnk by computing fill-in for each pivot row to be merged in */ 1293 prow = jl[k]; /* 1st pivot row */ 1294 1295 while (prow < k){ 1296 nextprow = jl[prow]; 1297 1298 /* merge prow into k-th row */ 1299 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1300 jmax = ui[prow+1]; 1301 ncols = jmax-jmin; 1302 i = jmin - ui[prow]; 1303 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1304 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1305 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1306 nzk += nlnk; 1307 1308 /* update il and jl for prow */ 1309 if (jmin < jmax){ 1310 il[prow] = jmin; 1311 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1312 } 1313 prow = nextprow; 1314 } 1315 1316 /* if free space is not available, make more free space */ 1317 if (current_space->local_remaining<nzk) { 1318 i = am - k + 1; /* num of unfactored rows */ 1319 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1320 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1321 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1322 reallocs++; 1323 } 1324 1325 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1326 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1327 1328 /* add the k-th row into il and jl */ 1329 if (nzk > 1){ 1330 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1331 jl[k] = jl[i]; jl[i] = k; 1332 il[k] = ui[k] + 1; 1333 } 1334 uj_ptr[k] = current_space->array; 1335 uj_lvl_ptr[k] = current_space_lvl->array; 1336 1337 current_space->array += nzk; 1338 current_space->local_used += nzk; 1339 current_space->local_remaining -= nzk; 1340 1341 current_space_lvl->array += nzk; 1342 current_space_lvl->local_used += nzk; 1343 current_space_lvl->local_remaining -= nzk; 1344 1345 ui[k+1] = ui[k] + nzk; 1346 } 1347 1348 if (ai[am] != 0) { 1349 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1350 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1351 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1352 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1353 } else { 1354 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1355 } 1356 1357 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1358 ierr = PetscFree(jl);CHKERRQ(ierr); 1359 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1360 1361 /* destroy list of free space and other temporary array(s) */ 1362 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1363 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1364 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1365 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1366 1367 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1368 1369 /* put together the new matrix in MATSEQSBAIJ format */ 1370 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1371 B = *fact; 1372 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1373 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1374 1375 b = (Mat_SeqSBAIJ*)B->data; 1376 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1377 b->singlemalloc = PETSC_FALSE; 1378 /* the next line frees the default space generated by the Create() */ 1379 ierr = PetscFree(b->a);CHKERRQ(ierr); 1380 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1381 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1382 b->j = uj; 1383 b->i = ui; 1384 b->diag = 0; 1385 b->ilen = 0; 1386 b->imax = 0; 1387 b->row = perm; 1388 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1389 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1390 b->icol = perm; 1391 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1392 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1393 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1394 b->maxnz = b->nz = ui[am]; 1395 1396 B->factor = FACTOR_CHOLESKY; 1397 B->info.factor_mallocs = reallocs; 1398 B->info.fill_ratio_given = fill; 1399 if (ai[am] != 0) { 1400 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1401 } else { 1402 B->info.fill_ratio_needed = 0.0; 1403 } 1404 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1405 if (perm_identity){ 1406 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1407 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1414 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1415 { 1416 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1417 Mat_SeqSBAIJ *b; 1418 Mat B; 1419 PetscErrorCode ierr; 1420 PetscTruth perm_identity; 1421 PetscReal fill = info->fill; 1422 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1423 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1424 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1425 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1426 PetscBT lnkbt; 1427 IS iperm; 1428 1429 PetscFunctionBegin; 1430 /* check whether perm is the identity mapping */ 1431 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1432 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1433 1434 if (!perm_identity){ 1435 /* check if perm is symmetric! */ 1436 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1437 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1438 for (i=0; i<am; i++) { 1439 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1440 } 1441 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1442 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1443 } 1444 1445 /* initialization */ 1446 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1447 ui[0] = 0; 1448 1449 /* jl: linked list for storing indices of the pivot rows 1450 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1451 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1452 il = jl + am; 1453 cols = il + am; 1454 ui_ptr = (PetscInt**)(cols + am); 1455 for (i=0; i<am; i++){ 1456 jl[i] = am; il[i] = 0; 1457 } 1458 1459 /* create and initialize a linked list for storing column indices of the active row k */ 1460 nlnk = am + 1; 1461 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1462 1463 /* initial FreeSpace size is fill*(ai[am]+1) */ 1464 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1465 current_space = free_space; 1466 1467 for (k=0; k<am; k++){ /* for each active row k */ 1468 /* initialize lnk by the column indices of row rip[k] of A */ 1469 nzk = 0; 1470 ncols = ai[rip[k]+1] - ai[rip[k]]; 1471 ncols_upper = 0; 1472 for (j=0; j<ncols; j++){ 1473 i = rip[*(aj + ai[rip[k]] + j)]; 1474 if (i >= k){ /* only take upper triangular entry */ 1475 cols[ncols_upper] = i; 1476 ncols_upper++; 1477 } 1478 } 1479 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1480 nzk += nlnk; 1481 1482 /* update lnk by computing fill-in for each pivot row to be merged in */ 1483 prow = jl[k]; /* 1st pivot row */ 1484 1485 while (prow < k){ 1486 nextprow = jl[prow]; 1487 /* merge prow into k-th row */ 1488 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1489 jmax = ui[prow+1]; 1490 ncols = jmax-jmin; 1491 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1492 ierr = PetscLLAddSorted(ncols,uj_ptr,prow,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1493 nzk += nlnk; 1494 1495 /* update il and jl for prow */ 1496 if (jmin < jmax){ 1497 il[prow] = jmin; 1498 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1499 } 1500 prow = nextprow; 1501 } 1502 1503 /* if free space is not available, make more free space */ 1504 if (current_space->local_remaining<nzk) { 1505 i = am - k + 1; /* num of unfactored rows */ 1506 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1507 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1508 reallocs++; 1509 } 1510 1511 /* copy data into free space, then initialize lnk */ 1512 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1513 1514 /* add the k-th row into il and jl */ 1515 if (nzk-1 > 0){ 1516 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1517 jl[k] = jl[i]; jl[i] = k; 1518 il[k] = ui[k] + 1; 1519 } 1520 ui_ptr[k] = current_space->array; 1521 current_space->array += nzk; 1522 current_space->local_used += nzk; 1523 current_space->local_remaining -= nzk; 1524 1525 ui[k+1] = ui[k] + nzk; 1526 } 1527 1528 if (ai[am] != 0) { 1529 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1530 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1531 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1532 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1533 } else { 1534 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1535 } 1536 1537 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1538 ierr = PetscFree(jl);CHKERRQ(ierr); 1539 1540 /* destroy list of free space and other temporary array(s) */ 1541 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1542 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1543 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1544 1545 /* put together the new matrix in MATSEQSBAIJ format */ 1546 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1547 B = *fact; 1548 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1549 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1550 1551 b = (Mat_SeqSBAIJ*)B->data; 1552 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1553 b->singlemalloc = PETSC_FALSE; 1554 /* the next line frees the default space generated by the Create() */ 1555 ierr = PetscFree(b->a);CHKERRQ(ierr); 1556 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1557 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1558 b->j = uj; 1559 b->i = ui; 1560 b->diag = 0; 1561 b->ilen = 0; 1562 b->imax = 0; 1563 b->row = perm; 1564 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1565 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1566 b->icol = perm; 1567 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1568 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1569 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1570 b->maxnz = b->nz = ui[am]; 1571 1572 B->factor = FACTOR_CHOLESKY; 1573 B->info.factor_mallocs = reallocs; 1574 B->info.fill_ratio_given = fill; 1575 if (ai[am] != 0) { 1576 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1577 } else { 1578 B->info.fill_ratio_needed = 0.0; 1579 } 1580 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1581 if (perm_identity){ 1582 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1583 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587