1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 254 255 ierr = MatILUDTFactor_Inode(A,info,isrow,iscol,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 if (ai[n] != 0) { 355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 356 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 357 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 360 } else { 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 362 } 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 375 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 376 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(*B)->data; 379 ierr = PetscFree(b->imax);CHKERRQ(ierr); 380 b->singlemalloc = PETSC_FALSE; 381 /* the next line frees the default space generated by the Create() */ 382 ierr = PetscFree(b->a);CHKERRQ(ierr); 383 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 384 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 385 b->j = bj; 386 b->i = bi; 387 b->diag = bdiag; 388 b->ilen = 0; 389 b->imax = 0; 390 b->row = isrow; 391 b->col = iscol; 392 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 393 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 394 b->icol = isicol; 395 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 396 397 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 398 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 399 b->maxnz = b->nz = bi[n] ; 400 401 (*B)->factor = FACTOR_LU; 402 (*B)->info.factor_mallocs = reallocs; 403 (*B)->info.fill_ratio_given = f; 404 405 if (ai[n] != 0) { 406 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 407 } else { 408 (*B)->info.fill_ratio_needed = 0.0; 409 } 410 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 PetscFunctionReturn(0); 413 } 414 415 /* ----------------------------------------------------------- */ 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 419 { 420 Mat C=*B; 421 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 422 IS isrow = b->row,isicol = b->icol; 423 PetscErrorCode ierr; 424 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 425 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 426 PetscInt *diag_offset = b->diag,diag,*pj; 427 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 428 PetscReal zeropivot,rs,d,shift_nz; 429 PetscReal row_shift,shift_top=0.; 430 PetscTruth shift_pd; 431 LUShift_Ctx sctx; 432 PetscInt newshift; 433 434 PetscFunctionBegin; 435 shift_nz = info->shiftnz; 436 shift_pd = info->shiftpd; 437 zeropivot = info->zeropivot; 438 439 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 440 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 441 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 442 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 443 rtmps = rtmp; ics = ic; 444 445 if (!a->diag) { 446 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 447 } 448 /* if both shift schemes are chosen by user, only use shift_pd */ 449 if (shift_pd && shift_nz) shift_nz = 0.0; 450 if (shift_pd) { /* set shift_top=max{row_shift} */ 451 PetscInt *aai = a->i,*ddiag = a->diag; 452 shift_top = 0; 453 for (i=0; i<n; i++) { 454 d = PetscAbsScalar((a->a)[ddiag[i]]); 455 /* calculate amt of shift needed for this row */ 456 if (d<=0) { 457 row_shift = 0; 458 } else { 459 row_shift = -2*d; 460 } 461 v = a->a+aai[i]; 462 nz = aai[i+1] - aai[i]; 463 for (j=0; j<nz; j++) 464 row_shift += PetscAbsScalar(v[j]); 465 if (row_shift>shift_top) shift_top = row_shift; 466 } 467 } 468 469 sctx.shift_amount = 0; 470 sctx.shift_top = shift_top; 471 sctx.nshift = 0; 472 sctx.nshift_max = 5; 473 sctx.shift_lo = 0.; 474 sctx.shift_hi = 1.; 475 do { 476 sctx.lushift = PETSC_FALSE; 477 for (i=0; i<n; i++){ 478 nz = bi[i+1] - bi[i]; 479 bjtmp = bj + bi[i]; 480 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmp = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmp[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 490 491 row = *bjtmp++; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = bi[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 502 } 503 row = *bjtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + bi[i] ; 507 pj = b->j + bi[i] ; 508 nz = bi[i+1] - bi[i]; 509 diag = diag_offset[i] - bi[i]; 510 rs = 0.0; 511 for (j=0; j<nz; j++) { 512 pv[j] = rtmps[pj[j]]; 513 if (j != diag) rs += PetscAbsScalar(pv[j]); 514 } 515 516 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 517 sctx.rs = rs; 518 sctx.pv = pv[diag]; 519 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 520 if (newshift == 1){ 521 break; /* sctx.shift_amount is updated */ 522 } else if (newshift == -1){ 523 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 524 } 525 } 526 527 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 528 /* 529 * if no shift in this attempt & shifting & started shifting & can refine, 530 * then try lower shift 531 */ 532 sctx.shift_hi = info->shift_fraction; 533 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 534 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 535 sctx.lushift = PETSC_TRUE; 536 sctx.nshift++; 537 } 538 } while (sctx.lushift); 539 540 /* invert diagonal entries for simplier triangular solves */ 541 for (i=0; i<n; i++) { 542 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 543 } 544 545 ierr = PetscFree(rtmp);CHKERRQ(ierr); 546 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 547 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 548 C->factor = FACTOR_LU; 549 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 550 C->assembled = PETSC_TRUE; 551 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 552 if (sctx.nshift){ 553 if (shift_nz) { 554 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 555 } else if (shift_pd) { 556 b->lu_shift_fraction = info->shift_fraction; 557 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 558 } 559 } 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 565 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 566 { 567 PetscFunctionBegin; 568 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 569 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 570 PetscFunctionReturn(0); 571 } 572 573 574 /* ----------------------------------------------------------- */ 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatLUFactor_SeqAIJ" 577 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 578 { 579 PetscErrorCode ierr; 580 Mat C; 581 582 PetscFunctionBegin; 583 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 584 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 585 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 586 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 /* ----------------------------------------------------------- */ 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatSolve_SeqAIJ" 592 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 593 { 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 IS iscol = a->col,isrow = a->row; 596 PetscErrorCode ierr; 597 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 598 PetscInt nz,*rout,*cout; 599 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 600 601 PetscFunctionBegin; 602 if (!n) PetscFunctionReturn(0); 603 604 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 605 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 606 tmp = a->solve_work; 607 608 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 609 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 610 611 /* forward solve the lower triangular */ 612 tmp[0] = b[*r++]; 613 tmps = tmp; 614 for (i=1; i<n; i++) { 615 v = aa + ai[i] ; 616 vi = aj + ai[i] ; 617 nz = a->diag[i] - ai[i]; 618 sum = b[*r++]; 619 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 620 tmp[i] = sum; 621 } 622 623 /* backward solve the upper triangular */ 624 for (i=n-1; i>=0; i--){ 625 v = aa + a->diag[i] + 1; 626 vi = aj + a->diag[i] + 1; 627 nz = ai[i+1] - a->diag[i] - 1; 628 sum = tmp[i]; 629 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 630 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 631 } 632 633 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 634 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 635 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 636 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 637 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 /* ----------------------------------------------------------- */ 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 644 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 645 { 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647 PetscErrorCode ierr; 648 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 649 PetscScalar *x,*b,*aa = a->a; 650 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 651 PetscInt adiag_i,i,*vi,nz,ai_i; 652 PetscScalar *v,sum; 653 #endif 654 655 PetscFunctionBegin; 656 if (!n) PetscFunctionReturn(0); 657 658 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 660 661 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 662 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 663 #else 664 /* forward solve the lower triangular */ 665 x[0] = b[0]; 666 for (i=1; i<n; i++) { 667 ai_i = ai[i]; 668 v = aa + ai_i; 669 vi = aj + ai_i; 670 nz = adiag[i] - ai_i; 671 sum = b[i]; 672 while (nz--) sum -= *v++ * x[*vi++]; 673 x[i] = sum; 674 } 675 676 /* backward solve the upper triangular */ 677 for (i=n-1; i>=0; i--){ 678 adiag_i = adiag[i]; 679 v = aa + adiag_i + 1; 680 vi = aj + adiag_i + 1; 681 nz = ai[i+1] - adiag_i - 1; 682 sum = x[i]; 683 while (nz--) sum -= *v++ * x[*vi++]; 684 x[i] = sum*aa[adiag_i]; 685 } 686 #endif 687 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 688 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 689 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 695 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 696 { 697 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 698 IS iscol = a->col,isrow = a->row; 699 PetscErrorCode ierr; 700 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 701 PetscInt nz,*rout,*cout; 702 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 703 704 PetscFunctionBegin; 705 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 706 707 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 708 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 709 tmp = a->solve_work; 710 711 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 712 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 713 714 /* forward solve the lower triangular */ 715 tmp[0] = b[*r++]; 716 for (i=1; i<n; i++) { 717 v = aa + ai[i] ; 718 vi = aj + ai[i] ; 719 nz = a->diag[i] - ai[i]; 720 sum = b[*r++]; 721 while (nz--) sum -= *v++ * tmp[*vi++ ]; 722 tmp[i] = sum; 723 } 724 725 /* backward solve the upper triangular */ 726 for (i=n-1; i>=0; i--){ 727 v = aa + a->diag[i] + 1; 728 vi = aj + a->diag[i] + 1; 729 nz = ai[i+1] - a->diag[i] - 1; 730 sum = tmp[i]; 731 while (nz--) sum -= *v++ * tmp[*vi++ ]; 732 tmp[i] = sum*aa[a->diag[i]]; 733 x[*c--] += tmp[i]; 734 } 735 736 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 738 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 739 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 740 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 741 742 PetscFunctionReturn(0); 743 } 744 /* -------------------------------------------------------------------*/ 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 747 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 748 { 749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 750 IS iscol = a->col,isrow = a->row; 751 PetscErrorCode ierr; 752 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 753 PetscInt nz,*rout,*cout,*diag = a->diag; 754 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 755 756 PetscFunctionBegin; 757 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 758 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 759 tmp = a->solve_work; 760 761 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 762 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 763 764 /* copy the b into temp work space according to permutation */ 765 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 766 767 /* forward solve the U^T */ 768 for (i=0; i<n; i++) { 769 v = aa + diag[i] ; 770 vi = aj + diag[i] + 1; 771 nz = ai[i+1] - diag[i] - 1; 772 s1 = tmp[i]; 773 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 774 while (nz--) { 775 tmp[*vi++ ] -= (*v++)*s1; 776 } 777 tmp[i] = s1; 778 } 779 780 /* backward solve the L^T */ 781 for (i=n-1; i>=0; i--){ 782 v = aa + diag[i] - 1 ; 783 vi = aj + diag[i] - 1 ; 784 nz = diag[i] - ai[i]; 785 s1 = tmp[i]; 786 while (nz--) { 787 tmp[*vi-- ] -= (*v--)*s1; 788 } 789 } 790 791 /* copy tmp into x according to permutation */ 792 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 793 794 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 796 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 797 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 798 799 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 805 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 806 { 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 808 IS iscol = a->col,isrow = a->row; 809 PetscErrorCode ierr; 810 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 811 PetscInt nz,*rout,*cout,*diag = a->diag; 812 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 813 814 PetscFunctionBegin; 815 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 816 817 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 tmp = a->solve_work; 820 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823 824 /* copy the b into temp work space according to permutation */ 825 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 826 827 /* forward solve the U^T */ 828 for (i=0; i<n; i++) { 829 v = aa + diag[i] ; 830 vi = aj + diag[i] + 1; 831 nz = ai[i+1] - diag[i] - 1; 832 tmp[i] *= *v++; 833 while (nz--) { 834 tmp[*vi++ ] -= (*v++)*tmp[i]; 835 } 836 } 837 838 /* backward solve the L^T */ 839 for (i=n-1; i>=0; i--){ 840 v = aa + diag[i] - 1 ; 841 vi = aj + diag[i] - 1 ; 842 nz = diag[i] - ai[i]; 843 while (nz--) { 844 tmp[*vi-- ] -= (*v--)*tmp[i]; 845 } 846 } 847 848 /* copy tmp into x according to permutation */ 849 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 850 851 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 852 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 853 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 854 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 855 856 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 /* ----------------------------------------------------------------*/ 860 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 864 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 865 { 866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 867 IS isicol; 868 PetscErrorCode ierr; 869 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 870 PetscInt *bi,*cols,nnz,*cols_lvl; 871 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 872 PetscInt i,levels,diagonal_fill; 873 PetscTruth col_identity,row_identity; 874 PetscReal f; 875 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 876 PetscBT lnkbt; 877 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 878 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 879 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 880 881 PetscFunctionBegin; 882 f = info->fill; 883 levels = (PetscInt)info->levels; 884 diagonal_fill = (PetscInt)info->diagonal_fill; 885 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 886 887 /* special case that simply copies fill pattern */ 888 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 889 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 890 if (!levels && row_identity && col_identity) { 891 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 892 (*fact)->factor = FACTOR_LU; 893 b = (Mat_SeqAIJ*)(*fact)->data; 894 if (!b->diag) { 895 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 896 } 897 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 b->row = isrow; 899 b->col = iscol; 900 b->icol = isicol; 901 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 902 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 903 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 904 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 909 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 910 911 /* get new row pointers */ 912 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 913 bi[0] = 0; 914 /* bdiag is location of diagonal in factor */ 915 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 916 bdiag[0] = 0; 917 918 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 919 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 920 921 /* create a linked list for storing column indices of the active row */ 922 nlnk = n + 1; 923 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 924 925 /* initial FreeSpace size is f*(ai[n]+1) */ 926 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 927 current_space = free_space; 928 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 929 current_space_lvl = free_space_lvl; 930 931 for (i=0; i<n; i++) { 932 nzi = 0; 933 /* copy current row into linked list */ 934 nnz = ai[r[i]+1] - ai[r[i]]; 935 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 936 cols = aj + ai[r[i]]; 937 lnk[i] = -1; /* marker to indicate if diagonal exists */ 938 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 939 nzi += nlnk; 940 941 /* make sure diagonal entry is included */ 942 if (diagonal_fill && lnk[i] == -1) { 943 fm = n; 944 while (lnk[fm] < i) fm = lnk[fm]; 945 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 946 lnk[fm] = i; 947 lnk_lvl[i] = 0; 948 nzi++; dcount++; 949 } 950 951 /* add pivot rows into the active row */ 952 nzbd = 0; 953 prow = lnk[n]; 954 while (prow < i) { 955 nnz = bdiag[prow]; 956 cols = bj_ptr[prow] + nnz + 1; 957 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 958 nnz = bi[prow+1] - bi[prow] - nnz - 1; 959 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 960 nzi += nlnk; 961 prow = lnk[prow]; 962 nzbd++; 963 } 964 bdiag[i] = nzbd; 965 bi[i+1] = bi[i] + nzi; 966 967 /* if free space is not available, make more free space */ 968 if (current_space->local_remaining<nzi) { 969 nnz = nzi*(n - i); /* estimated and max additional space needed */ 970 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 971 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 972 reallocs++; 973 } 974 975 /* copy data into free_space and free_space_lvl, then initialize lnk */ 976 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 977 bj_ptr[i] = current_space->array; 978 bjlvl_ptr[i] = current_space_lvl->array; 979 980 /* make sure the active row i has diagonal entry */ 981 if (*(bj_ptr[i]+bdiag[i]) != i) { 982 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 983 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 984 } 985 986 current_space->array += nzi; 987 current_space->local_used += nzi; 988 current_space->local_remaining -= nzi; 989 current_space_lvl->array += nzi; 990 current_space_lvl->local_used += nzi; 991 current_space_lvl->local_remaining -= nzi; 992 } 993 994 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 995 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 996 997 /* destroy list of free space and other temporary arrays */ 998 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 999 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1000 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1001 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1002 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1003 1004 { 1005 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1006 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1007 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1008 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1009 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1010 if (diagonal_fill) { 1011 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1012 } 1013 } 1014 1015 /* put together the new matrix */ 1016 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1017 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1018 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1019 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1020 b = (Mat_SeqAIJ*)(*fact)->data; 1021 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1022 b->singlemalloc = PETSC_FALSE; 1023 len = (bi[n] )*sizeof(PetscScalar); 1024 /* the next line frees the default space generated by the Create() */ 1025 ierr = PetscFree(b->a);CHKERRQ(ierr); 1026 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1027 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1028 b->j = bj; 1029 b->i = bi; 1030 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1031 b->diag = bdiag; 1032 b->ilen = 0; 1033 b->imax = 0; 1034 b->row = isrow; 1035 b->col = iscol; 1036 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1037 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1038 b->icol = isicol; 1039 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1040 /* In b structure: Free imax, ilen, old a, old j. 1041 Allocate bdiag, solve_work, new a, new j */ 1042 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1043 b->maxnz = b->nz = bi[n] ; 1044 (*fact)->factor = FACTOR_LU; 1045 (*fact)->info.factor_mallocs = reallocs; 1046 (*fact)->info.fill_ratio_given = f; 1047 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1048 1049 ierr = MatILUFactorSymbolic_Inode(A,info,isrow,iscol,fact);CHKERRQ(ierr); 1050 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1051 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #include "src/mat/impls/sbaij/seq/sbaij.h" 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1058 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1059 { 1060 Mat C = *B; 1061 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1062 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1063 IS ip=b->row; 1064 PetscErrorCode ierr; 1065 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1066 PetscInt *ai=a->i,*aj=a->j; 1067 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1068 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1069 PetscReal zeropivot,rs,shiftnz; 1070 PetscTruth shiftpd; 1071 ChShift_Ctx sctx; 1072 PetscInt newshift; 1073 1074 PetscFunctionBegin; 1075 shiftnz = info->shiftnz; 1076 shiftpd = info->shiftpd; 1077 zeropivot = info->zeropivot; 1078 1079 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1080 1081 /* initialization */ 1082 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1083 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1084 jl = il + mbs; 1085 rtmp = (MatScalar*)(jl + mbs); 1086 1087 sctx.shift_amount = 0; 1088 sctx.nshift = 0; 1089 do { 1090 sctx.chshift = PETSC_FALSE; 1091 for (i=0; i<mbs; i++) { 1092 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1093 } 1094 1095 for (k = 0; k<mbs; k++){ 1096 bval = ba + bi[k]; 1097 /* initialize k-th row by the perm[k]-th row of A */ 1098 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1099 for (j = jmin; j < jmax; j++){ 1100 col = rip[aj[j]]; 1101 if (col >= k){ /* only take upper triangular entry */ 1102 rtmp[col] = aa[j]; 1103 *bval++ = 0.0; /* for in-place factorization */ 1104 } 1105 } 1106 /* shift the diagonal of the matrix */ 1107 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1108 1109 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1110 dk = rtmp[k]; 1111 i = jl[k]; /* first row to be added to k_th row */ 1112 1113 while (i < k){ 1114 nexti = jl[i]; /* next row to be added to k_th row */ 1115 1116 /* compute multiplier, update diag(k) and U(i,k) */ 1117 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1118 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1119 dk += uikdi*ba[ili]; 1120 ba[ili] = uikdi; /* -U(i,k) */ 1121 1122 /* add multiple of row i to k-th row */ 1123 jmin = ili + 1; jmax = bi[i+1]; 1124 if (jmin < jmax){ 1125 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1126 /* update il and jl for row i */ 1127 il[i] = jmin; 1128 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1129 } 1130 i = nexti; 1131 } 1132 1133 /* shift the diagonals when zero pivot is detected */ 1134 /* compute rs=sum of abs(off-diagonal) */ 1135 rs = 0.0; 1136 jmin = bi[k]+1; 1137 nz = bi[k+1] - jmin; 1138 if (nz){ 1139 bcol = bj + jmin; 1140 while (nz--){ 1141 rs += PetscAbsScalar(rtmp[*bcol]); 1142 bcol++; 1143 } 1144 } 1145 1146 sctx.rs = rs; 1147 sctx.pv = dk; 1148 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1149 if (newshift == 1){ 1150 break; /* sctx.shift_amount is updated */ 1151 } else if (newshift == -1){ 1152 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1153 } 1154 1155 /* copy data into U(k,:) */ 1156 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1157 jmin = bi[k]+1; jmax = bi[k+1]; 1158 if (jmin < jmax) { 1159 for (j=jmin; j<jmax; j++){ 1160 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1161 } 1162 /* add the k-th row into il and jl */ 1163 il[k] = jmin; 1164 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1165 } 1166 } 1167 } while (sctx.chshift); 1168 ierr = PetscFree(il);CHKERRQ(ierr); 1169 1170 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1171 C->factor = FACTOR_CHOLESKY; 1172 C->assembled = PETSC_TRUE; 1173 C->preallocated = PETSC_TRUE; 1174 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1175 if (sctx.nshift){ 1176 if (shiftnz) { 1177 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1178 } else if (shiftpd) { 1179 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1180 } 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1187 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1188 { 1189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1190 Mat_SeqSBAIJ *b; 1191 Mat B; 1192 PetscErrorCode ierr; 1193 PetscTruth perm_identity; 1194 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1195 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1196 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1197 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1198 PetscReal fill=info->fill,levels=info->levels; 1199 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1200 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1201 PetscBT lnkbt; 1202 1203 PetscFunctionBegin; 1204 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1205 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1206 1207 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1208 ui[0] = 0; 1209 1210 /* special case that simply copies fill pattern */ 1211 if (!levels && perm_identity) { 1212 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1213 for (i=0; i<am; i++) { 1214 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1215 } 1216 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1217 cols = uj; 1218 for (i=0; i<am; i++) { 1219 aj = a->j + a->diag[i]; 1220 ncols = ui[i+1] - ui[i]; 1221 for (j=0; j<ncols; j++) *cols++ = *aj++; 1222 } 1223 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1224 /* initialization */ 1225 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1226 1227 /* jl: linked list for storing indices of the pivot rows 1228 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1229 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1230 il = jl + am; 1231 uj_ptr = (PetscInt**)(il + am); 1232 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1233 for (i=0; i<am; i++){ 1234 jl[i] = am; il[i] = 0; 1235 } 1236 1237 /* create and initialize a linked list for storing column indices of the active row k */ 1238 nlnk = am + 1; 1239 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1240 1241 /* initial FreeSpace size is fill*(ai[am]+1) */ 1242 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1243 current_space = free_space; 1244 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1245 current_space_lvl = free_space_lvl; 1246 1247 for (k=0; k<am; k++){ /* for each active row k */ 1248 /* initialize lnk by the column indices of row rip[k] of A */ 1249 nzk = 0; 1250 ncols = ai[rip[k]+1] - ai[rip[k]]; 1251 ncols_upper = 0; 1252 for (j=0; j<ncols; j++){ 1253 i = *(aj + ai[rip[k]] + j); 1254 if (rip[i] >= k){ /* only take upper triangular entry */ 1255 ajtmp[ncols_upper] = i; 1256 ncols_upper++; 1257 } 1258 } 1259 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1260 nzk += nlnk; 1261 1262 /* update lnk by computing fill-in for each pivot row to be merged in */ 1263 prow = jl[k]; /* 1st pivot row */ 1264 1265 while (prow < k){ 1266 nextprow = jl[prow]; 1267 1268 /* merge prow into k-th row */ 1269 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1270 jmax = ui[prow+1]; 1271 ncols = jmax-jmin; 1272 i = jmin - ui[prow]; 1273 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1274 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1275 j = *(uj - 1); 1276 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1277 nzk += nlnk; 1278 1279 /* update il and jl for prow */ 1280 if (jmin < jmax){ 1281 il[prow] = jmin; 1282 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1283 } 1284 prow = nextprow; 1285 } 1286 1287 /* if free space is not available, make more free space */ 1288 if (current_space->local_remaining<nzk) { 1289 i = am - k + 1; /* num of unfactored rows */ 1290 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1291 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1292 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1293 reallocs++; 1294 } 1295 1296 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1297 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1298 1299 /* add the k-th row into il and jl */ 1300 if (nzk > 1){ 1301 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1302 jl[k] = jl[i]; jl[i] = k; 1303 il[k] = ui[k] + 1; 1304 } 1305 uj_ptr[k] = current_space->array; 1306 uj_lvl_ptr[k] = current_space_lvl->array; 1307 1308 current_space->array += nzk; 1309 current_space->local_used += nzk; 1310 current_space->local_remaining -= nzk; 1311 1312 current_space_lvl->array += nzk; 1313 current_space_lvl->local_used += nzk; 1314 current_space_lvl->local_remaining -= nzk; 1315 1316 ui[k+1] = ui[k] + nzk; 1317 } 1318 1319 if (ai[am] != 0) { 1320 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1321 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1322 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1323 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1324 } else { 1325 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1326 } 1327 1328 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1329 ierr = PetscFree(jl);CHKERRQ(ierr); 1330 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1331 1332 /* destroy list of free space and other temporary array(s) */ 1333 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1334 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1335 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1336 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1337 1338 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1339 1340 /* put together the new matrix in MATSEQSBAIJ format */ 1341 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1342 B = *fact; 1343 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1344 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1345 1346 b = (Mat_SeqSBAIJ*)B->data; 1347 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1348 b->singlemalloc = PETSC_FALSE; 1349 /* the next line frees the default space generated by the Create() */ 1350 ierr = PetscFree(b->a);CHKERRQ(ierr); 1351 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1352 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1353 b->j = uj; 1354 b->i = ui; 1355 b->diag = 0; 1356 b->ilen = 0; 1357 b->imax = 0; 1358 b->row = perm; 1359 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1360 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1361 b->icol = perm; 1362 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1363 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1364 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1365 b->maxnz = b->nz = ui[am]; 1366 1367 B->factor = FACTOR_CHOLESKY; 1368 B->info.factor_mallocs = reallocs; 1369 B->info.fill_ratio_given = fill; 1370 if (ai[am] != 0) { 1371 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1372 } else { 1373 B->info.fill_ratio_needed = 0.0; 1374 } 1375 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1376 if (perm_identity){ 1377 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1378 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1385 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1386 { 1387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1388 Mat_SeqSBAIJ *b; 1389 Mat B; 1390 PetscErrorCode ierr; 1391 PetscTruth perm_identity; 1392 PetscReal fill = info->fill; 1393 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1394 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1395 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1396 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1397 PetscBT lnkbt; 1398 IS iperm; 1399 1400 PetscFunctionBegin; 1401 /* check whether perm is the identity mapping */ 1402 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1403 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1404 1405 if (!perm_identity){ 1406 /* check if perm is symmetric! */ 1407 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1408 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1409 for (i=0; i<am; i++) { 1410 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1411 } 1412 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1413 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1414 } 1415 1416 /* initialization */ 1417 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1418 ui[0] = 0; 1419 1420 /* jl: linked list for storing indices of the pivot rows 1421 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1422 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1423 il = jl + am; 1424 cols = il + am; 1425 ui_ptr = (PetscInt**)(cols + am); 1426 for (i=0; i<am; i++){ 1427 jl[i] = am; il[i] = 0; 1428 } 1429 1430 /* create and initialize a linked list for storing column indices of the active row k */ 1431 nlnk = am + 1; 1432 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1433 1434 /* initial FreeSpace size is fill*(ai[am]+1) */ 1435 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1436 current_space = free_space; 1437 1438 for (k=0; k<am; k++){ /* for each active row k */ 1439 /* initialize lnk by the column indices of row rip[k] of A */ 1440 nzk = 0; 1441 ncols = ai[rip[k]+1] - ai[rip[k]]; 1442 ncols_upper = 0; 1443 for (j=0; j<ncols; j++){ 1444 i = rip[*(aj + ai[rip[k]] + j)]; 1445 if (i >= k){ /* only take upper triangular entry */ 1446 cols[ncols_upper] = i; 1447 ncols_upper++; 1448 } 1449 } 1450 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1451 nzk += nlnk; 1452 1453 /* update lnk by computing fill-in for each pivot row to be merged in */ 1454 prow = jl[k]; /* 1st pivot row */ 1455 1456 while (prow < k){ 1457 nextprow = jl[prow]; 1458 /* merge prow into k-th row */ 1459 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1460 jmax = ui[prow+1]; 1461 ncols = jmax-jmin; 1462 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1463 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1464 nzk += nlnk; 1465 1466 /* update il and jl for prow */ 1467 if (jmin < jmax){ 1468 il[prow] = jmin; 1469 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1470 } 1471 prow = nextprow; 1472 } 1473 1474 /* if free space is not available, make more free space */ 1475 if (current_space->local_remaining<nzk) { 1476 i = am - k + 1; /* num of unfactored rows */ 1477 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1478 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1479 reallocs++; 1480 } 1481 1482 /* copy data into free space, then initialize lnk */ 1483 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1484 1485 /* add the k-th row into il and jl */ 1486 if (nzk-1 > 0){ 1487 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1488 jl[k] = jl[i]; jl[i] = k; 1489 il[k] = ui[k] + 1; 1490 } 1491 ui_ptr[k] = current_space->array; 1492 current_space->array += nzk; 1493 current_space->local_used += nzk; 1494 current_space->local_remaining -= nzk; 1495 1496 ui[k+1] = ui[k] + nzk; 1497 } 1498 1499 if (ai[am] != 0) { 1500 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1501 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1502 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1503 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1504 } else { 1505 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1506 } 1507 1508 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1509 ierr = PetscFree(jl);CHKERRQ(ierr); 1510 1511 /* destroy list of free space and other temporary array(s) */ 1512 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1513 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1514 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1515 1516 /* put together the new matrix in MATSEQSBAIJ format */ 1517 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1518 B = *fact; 1519 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1520 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1521 1522 b = (Mat_SeqSBAIJ*)B->data; 1523 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1524 b->singlemalloc = PETSC_FALSE; 1525 /* the next line frees the default space generated by the Create() */ 1526 ierr = PetscFree(b->a);CHKERRQ(ierr); 1527 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1528 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1529 b->j = uj; 1530 b->i = ui; 1531 b->diag = 0; 1532 b->ilen = 0; 1533 b->imax = 0; 1534 b->row = perm; 1535 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1536 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1537 b->icol = perm; 1538 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1539 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1540 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1541 b->maxnz = b->nz = ui[am]; 1542 1543 B->factor = FACTOR_CHOLESKY; 1544 B->info.factor_mallocs = reallocs; 1545 B->info.fill_ratio_given = fill; 1546 if (ai[am] != 0) { 1547 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1548 } else { 1549 B->info.fill_ratio_needed = 0.0; 1550 } 1551 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1552 if (perm_identity){ 1553 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1554 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558