1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 /* the next line frees the default space generated by the MatCreate() */ 233 ierr = PetscFree(b->a);CHKERRQ(ierr); 234 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 255 256 /* Call parent MatILUDTFactor here */ 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 311 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 nzi += nlnk; 313 314 /* add pivot rows into linked list */ 315 row = lnk[n]; 316 while (row < i) { 317 nzbd = bdiag[row] - bi[row] + 1; 318 ajtmp = bi_ptr[row] + nzbd; 319 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 320 im[row] = nzbd; 321 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 322 nzi += nlnk; 323 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 324 325 row = lnk[row]; 326 } 327 328 bi[i+1] = bi[i] + nzi; 329 im[i] = nzi; 330 331 /* mark bdiag */ 332 nzbd = 0; 333 nnz = nzi; 334 k = lnk[n]; 335 while (nnz-- && k < i){ 336 nzbd++; 337 k = lnk[k]; 338 } 339 bdiag[i] = bi[i] + nzbd; 340 341 /* if free space is not available, make more free space */ 342 if (current_space->local_remaining<nzi) { 343 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 344 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 345 reallocs++; 346 } 347 348 /* copy data into free space, then initialize lnk */ 349 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 350 bi_ptr[i] = current_space->array; 351 current_space->array += nzi; 352 current_space->local_used += nzi; 353 current_space->local_remaining -= nzi; 354 } 355 if (ai[n] != 0) { 356 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 357 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 360 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 361 } else { 362 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 363 } 364 365 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 366 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 367 368 /* destroy list of free space and other temporary array(s) */ 369 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 370 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 371 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 372 ierr = PetscFree(cols);CHKERRQ(ierr); 373 374 /* put together the new matrix */ 375 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 376 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 377 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 378 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 379 b = (Mat_SeqAIJ*)(*B)->data; 380 ierr = PetscFree(b->imax);CHKERRQ(ierr); 381 b->singlemalloc = PETSC_FALSE; 382 /* the next line frees the default space generated by the Create() */ 383 ierr = PetscFree(b->a);CHKERRQ(ierr); 384 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 385 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 386 b->j = bj; 387 b->i = bi; 388 b->diag = bdiag; 389 b->ilen = 0; 390 b->imax = 0; 391 b->row = isrow; 392 b->col = iscol; 393 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 394 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 395 b->icol = isicol; 396 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 397 398 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 399 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 400 b->maxnz = b->nz = bi[n] ; 401 402 (*B)->factor = FACTOR_LU; 403 (*B)->info.factor_mallocs = reallocs; 404 (*B)->info.fill_ratio_given = f; 405 406 if (ai[n] != 0) { 407 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 408 } else { 409 (*B)->info.fill_ratio_needed = 0.0; 410 } 411 /* Call parent MatLUFactorSymbolic here */ 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413 PetscFunctionReturn(0); 414 } 415 416 /* ----------------------------------------------------------- */ 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 420 { 421 Mat C=*B; 422 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 423 IS isrow = b->row,isicol = b->icol; 424 PetscErrorCode ierr; 425 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 426 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 427 PetscInt *diag_offset = b->diag,diag,*pj; 428 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 429 PetscReal zeropivot,rs,d,shift_nz; 430 PetscReal row_shift,shift_top=0.; 431 PetscTruth shift_pd; 432 LUShift_Ctx sctx; 433 PetscInt newshift; 434 435 PetscFunctionBegin; 436 shift_nz = info->shiftnz; 437 shift_pd = info->shiftpd; 438 zeropivot = info->zeropivot; 439 440 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 441 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 442 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 443 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 444 rtmps = rtmp; ics = ic; 445 446 if (!a->diag) { 447 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 448 } 449 /* if both shift schemes are chosen by user, only use shift_pd */ 450 if (shift_pd && shift_nz) shift_nz = 0.0; 451 if (shift_pd) { /* set shift_top=max{row_shift} */ 452 PetscInt *aai = a->i,*ddiag = a->diag; 453 shift_top = 0; 454 for (i=0; i<n; i++) { 455 d = PetscAbsScalar((a->a)[ddiag[i]]); 456 /* calculate amt of shift needed for this row */ 457 if (d<=0) { 458 row_shift = 0; 459 } else { 460 row_shift = -2*d; 461 } 462 v = a->a+aai[i]; 463 nz = aai[i+1] - aai[i]; 464 for (j=0; j<nz; j++) 465 row_shift += PetscAbsScalar(v[j]); 466 if (row_shift>shift_top) shift_top = row_shift; 467 } 468 } 469 470 sctx.shift_amount = 0; 471 sctx.shift_top = shift_top; 472 sctx.nshift = 0; 473 sctx.nshift_max = 5; 474 sctx.shift_lo = 0.; 475 sctx.shift_hi = 1.; 476 do { 477 sctx.lushift = PETSC_FALSE; 478 for (i=0; i<n; i++){ 479 nz = bi[i+1] - bi[i]; 480 bjtmp = bj + bi[i]; 481 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 482 483 /* load in initial (unfactored row) */ 484 nz = a->i[r[i]+1] - a->i[r[i]]; 485 ajtmp = a->j + a->i[r[i]]; 486 v = a->a + a->i[r[i]]; 487 for (j=0; j<nz; j++) { 488 rtmp[ics[ajtmp[j]]] = v[j]; 489 } 490 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 491 492 row = *bjtmp++; 493 while (row < i) { 494 pc = rtmp + row; 495 if (*pc != 0.0) { 496 pv = b->a + diag_offset[row]; 497 pj = b->j + diag_offset[row] + 1; 498 multiplier = *pc / *pv++; 499 *pc = multiplier; 500 nz = bi[row+1] - diag_offset[row] - 1; 501 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 502 PetscLogFlops(2*nz); 503 } 504 row = *bjtmp++; 505 } 506 /* finished row so stick it into b->a */ 507 pv = b->a + bi[i] ; 508 pj = b->j + bi[i] ; 509 nz = bi[i+1] - bi[i]; 510 diag = diag_offset[i] - bi[i]; 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 517 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 518 sctx.rs = rs; 519 sctx.pv = pv[diag]; 520 ierr = Mat_LUCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 521 if (newshift == 1){ 522 break; /* sctx.shift_amount is updated */ 523 } else if (newshift == -1){ 524 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 525 } 526 } 527 528 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 529 /* 530 * if no shift in this attempt & shifting & started shifting & can refine, 531 * then try lower shift 532 */ 533 sctx.shift_hi = info->shift_fraction; 534 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 535 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 536 sctx.lushift = PETSC_TRUE; 537 sctx.nshift++; 538 } 539 } while (sctx.lushift); 540 541 /* invert diagonal entries for simplier triangular solves */ 542 for (i=0; i<n; i++) { 543 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 544 } 545 546 ierr = PetscFree(rtmp);CHKERRQ(ierr); 547 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 548 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 549 C->factor = FACTOR_LU; 550 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 551 C->assembled = PETSC_TRUE; 552 PetscLogFlops(C->n); 553 if (sctx.nshift){ 554 if (shift_nz) { 555 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 556 } else if (shift_pd) { 557 b->lu_shift_fraction = info->shift_fraction; 558 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 559 } 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 566 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 567 { 568 PetscFunctionBegin; 569 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 570 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 571 PetscFunctionReturn(0); 572 } 573 574 575 /* ----------------------------------------------------------- */ 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatLUFactor_SeqAIJ" 578 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 579 { 580 PetscErrorCode ierr; 581 Mat C; 582 583 PetscFunctionBegin; 584 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 585 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 586 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 587 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 /* ----------------------------------------------------------- */ 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSolve_SeqAIJ" 593 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 594 { 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 596 IS iscol = a->col,isrow = a->row; 597 PetscErrorCode ierr; 598 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 599 PetscInt nz,*rout,*cout; 600 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 601 602 PetscFunctionBegin; 603 if (!n) PetscFunctionReturn(0); 604 605 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607 tmp = a->solve_work; 608 609 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 610 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 611 612 /* forward solve the lower triangular */ 613 tmp[0] = b[*r++]; 614 tmps = tmp; 615 for (i=1; i<n; i++) { 616 v = aa + ai[i] ; 617 vi = aj + ai[i] ; 618 nz = a->diag[i] - ai[i]; 619 sum = b[*r++]; 620 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 621 tmp[i] = sum; 622 } 623 624 /* backward solve the upper triangular */ 625 for (i=n-1; i>=0; i--){ 626 v = aa + a->diag[i] + 1; 627 vi = aj + a->diag[i] + 1; 628 nz = ai[i+1] - a->diag[i] - 1; 629 sum = tmp[i]; 630 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 631 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 632 } 633 634 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 635 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 636 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 637 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 638 PetscLogFlops(2*a->nz - A->n); 639 PetscFunctionReturn(0); 640 } 641 642 /* ----------------------------------------------------------- */ 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 645 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 PetscErrorCode ierr; 649 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 650 PetscScalar *x,*b,*aa = a->a; 651 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 652 PetscInt adiag_i,i,*vi,nz,ai_i; 653 PetscScalar *v,sum; 654 #endif 655 656 PetscFunctionBegin; 657 if (!n) PetscFunctionReturn(0); 658 659 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 660 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 661 662 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 663 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 664 #else 665 /* forward solve the lower triangular */ 666 x[0] = b[0]; 667 for (i=1; i<n; i++) { 668 ai_i = ai[i]; 669 v = aa + ai_i; 670 vi = aj + ai_i; 671 nz = adiag[i] - ai_i; 672 sum = b[i]; 673 while (nz--) sum -= *v++ * x[*vi++]; 674 x[i] = sum; 675 } 676 677 /* backward solve the upper triangular */ 678 for (i=n-1; i>=0; i--){ 679 adiag_i = adiag[i]; 680 v = aa + adiag_i + 1; 681 vi = aj + adiag_i + 1; 682 nz = ai[i+1] - adiag_i - 1; 683 sum = x[i]; 684 while (nz--) sum -= *v++ * x[*vi++]; 685 x[i] = sum*aa[adiag_i]; 686 } 687 #endif 688 PetscLogFlops(2*a->nz - A->n); 689 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 690 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 696 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 697 { 698 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 699 IS iscol = a->col,isrow = a->row; 700 PetscErrorCode ierr; 701 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 702 PetscInt nz,*rout,*cout; 703 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 704 705 PetscFunctionBegin; 706 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 707 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 tmp = a->solve_work; 711 712 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 713 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 714 715 /* forward solve the lower triangular */ 716 tmp[0] = b[*r++]; 717 for (i=1; i<n; i++) { 718 v = aa + ai[i] ; 719 vi = aj + ai[i] ; 720 nz = a->diag[i] - ai[i]; 721 sum = b[*r++]; 722 while (nz--) sum -= *v++ * tmp[*vi++ ]; 723 tmp[i] = sum; 724 } 725 726 /* backward solve the upper triangular */ 727 for (i=n-1; i>=0; i--){ 728 v = aa + a->diag[i] + 1; 729 vi = aj + a->diag[i] + 1; 730 nz = ai[i+1] - a->diag[i] - 1; 731 sum = tmp[i]; 732 while (nz--) sum -= *v++ * tmp[*vi++ ]; 733 tmp[i] = sum*aa[a->diag[i]]; 734 x[*c--] += tmp[i]; 735 } 736 737 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 739 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 740 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 741 PetscLogFlops(2*a->nz); 742 743 PetscFunctionReturn(0); 744 } 745 /* -------------------------------------------------------------------*/ 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 748 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 749 { 750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 751 IS iscol = a->col,isrow = a->row; 752 PetscErrorCode ierr; 753 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 754 PetscInt nz,*rout,*cout,*diag = a->diag; 755 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 756 757 PetscFunctionBegin; 758 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 759 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 760 tmp = a->solve_work; 761 762 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 763 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 764 765 /* copy the b into temp work space according to permutation */ 766 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 767 768 /* forward solve the U^T */ 769 for (i=0; i<n; i++) { 770 v = aa + diag[i] ; 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = tmp[i]; 774 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 775 while (nz--) { 776 tmp[*vi++ ] -= (*v++)*s1; 777 } 778 tmp[i] = s1; 779 } 780 781 /* backward solve the L^T */ 782 for (i=n-1; i>=0; i--){ 783 v = aa + diag[i] - 1 ; 784 vi = aj + diag[i] - 1 ; 785 nz = diag[i] - ai[i]; 786 s1 = tmp[i]; 787 while (nz--) { 788 tmp[*vi-- ] -= (*v--)*s1; 789 } 790 } 791 792 /* copy tmp into x according to permutation */ 793 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 794 795 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 797 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 798 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 799 800 PetscLogFlops(2*a->nz-A->n); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 806 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 807 { 808 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 809 IS iscol = a->col,isrow = a->row; 810 PetscErrorCode ierr; 811 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 812 PetscInt nz,*rout,*cout,*diag = a->diag; 813 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 814 815 PetscFunctionBegin; 816 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 817 818 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 820 tmp = a->solve_work; 821 822 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 823 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 824 825 /* copy the b into temp work space according to permutation */ 826 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 827 828 /* forward solve the U^T */ 829 for (i=0; i<n; i++) { 830 v = aa + diag[i] ; 831 vi = aj + diag[i] + 1; 832 nz = ai[i+1] - diag[i] - 1; 833 tmp[i] *= *v++; 834 while (nz--) { 835 tmp[*vi++ ] -= (*v++)*tmp[i]; 836 } 837 } 838 839 /* backward solve the L^T */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + diag[i] - 1 ; 842 vi = aj + diag[i] - 1 ; 843 nz = diag[i] - ai[i]; 844 while (nz--) { 845 tmp[*vi-- ] -= (*v--)*tmp[i]; 846 } 847 } 848 849 /* copy tmp into x according to permutation */ 850 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 851 852 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 853 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 854 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 855 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 856 857 PetscLogFlops(2*a->nz); 858 PetscFunctionReturn(0); 859 } 860 /* ----------------------------------------------------------------*/ 861 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 865 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 866 { 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 868 IS isicol; 869 PetscErrorCode ierr; 870 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 871 PetscInt *bi,*cols,nnz,*cols_lvl; 872 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 873 PetscInt i,levels,diagonal_fill; 874 PetscTruth col_identity,row_identity; 875 PetscReal f; 876 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 877 PetscBT lnkbt; 878 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 879 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 880 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 881 882 PetscFunctionBegin; 883 f = info->fill; 884 levels = (PetscInt)info->levels; 885 diagonal_fill = (PetscInt)info->diagonal_fill; 886 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 887 888 /* special case that simply copies fill pattern */ 889 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 890 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 891 if (!levels && row_identity && col_identity) { 892 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 893 (*fact)->factor = FACTOR_LU; 894 b = (Mat_SeqAIJ*)(*fact)->data; 895 if (!b->diag) { 896 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 897 } 898 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 899 b->row = isrow; 900 b->col = iscol; 901 b->icol = isicol; 902 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 903 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 904 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 905 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 910 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 911 912 /* get new row pointers */ 913 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 914 bi[0] = 0; 915 /* bdiag is location of diagonal in factor */ 916 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 917 bdiag[0] = 0; 918 919 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 920 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 921 922 /* create a linked list for storing column indices of the active row */ 923 nlnk = n + 1; 924 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 925 926 /* initial FreeSpace size is f*(ai[n]+1) */ 927 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 928 current_space = free_space; 929 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 930 current_space_lvl = free_space_lvl; 931 932 for (i=0; i<n; i++) { 933 nzi = 0; 934 /* copy current row into linked list */ 935 nnz = ai[r[i]+1] - ai[r[i]]; 936 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 937 cols = aj + ai[r[i]]; 938 lnk[i] = -1; /* marker to indicate if diagonal exists */ 939 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 940 nzi += nlnk; 941 942 /* make sure diagonal entry is included */ 943 if (diagonal_fill && lnk[i] == -1) { 944 fm = n; 945 while (lnk[fm] < i) fm = lnk[fm]; 946 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 947 lnk[fm] = i; 948 lnk_lvl[i] = 0; 949 nzi++; dcount++; 950 } 951 952 /* add pivot rows into the active row */ 953 nzbd = 0; 954 prow = lnk[n]; 955 while (prow < i) { 956 nnz = bdiag[prow]; 957 cols = bj_ptr[prow] + nnz + 1; 958 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 959 nnz = bi[prow+1] - bi[prow] - nnz - 1; 960 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 961 nzi += nlnk; 962 prow = lnk[prow]; 963 nzbd++; 964 } 965 bdiag[i] = nzbd; 966 bi[i+1] = bi[i] + nzi; 967 968 /* if free space is not available, make more free space */ 969 if (current_space->local_remaining<nzi) { 970 nnz = nzi*(n - i); /* estimated and max additional space needed */ 971 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 972 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 973 reallocs++; 974 } 975 976 /* copy data into free_space and free_space_lvl, then initialize lnk */ 977 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 978 bj_ptr[i] = current_space->array; 979 bjlvl_ptr[i] = current_space_lvl->array; 980 981 /* make sure the active row i has diagonal entry */ 982 if (*(bj_ptr[i]+bdiag[i]) != i) { 983 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 984 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 985 } 986 987 current_space->array += nzi; 988 current_space->local_used += nzi; 989 current_space->local_remaining -= nzi; 990 current_space_lvl->array += nzi; 991 current_space_lvl->local_used += nzi; 992 current_space_lvl->local_remaining -= nzi; 993 } 994 995 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 997 998 /* destroy list of free space and other temporary arrays */ 999 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1000 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1001 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1002 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1003 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1004 1005 { 1006 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1007 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1008 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1009 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1010 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1011 if (diagonal_fill) { 1012 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1013 } 1014 } 1015 1016 /* put together the new matrix */ 1017 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1018 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1019 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1020 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1021 b = (Mat_SeqAIJ*)(*fact)->data; 1022 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1023 b->singlemalloc = PETSC_FALSE; 1024 len = (bi[n] )*sizeof(PetscScalar); 1025 /* the next line frees the default space generated by the Create() */ 1026 ierr = PetscFree(b->a);CHKERRQ(ierr); 1027 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1028 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1029 b->j = bj; 1030 b->i = bi; 1031 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1032 b->diag = bdiag; 1033 b->ilen = 0; 1034 b->imax = 0; 1035 b->row = isrow; 1036 b->col = iscol; 1037 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1038 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1039 b->icol = isicol; 1040 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1041 /* In b structure: Free imax, ilen, old a, old j. 1042 Allocate bdiag, solve_work, new a, new j */ 1043 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1044 b->maxnz = b->nz = bi[n] ; 1045 (*fact)->factor = FACTOR_LU; 1046 (*fact)->info.factor_mallocs = reallocs; 1047 (*fact)->info.fill_ratio_given = f; 1048 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1049 1050 /* Call parent MatILUFactorSymbolic here */ 1051 1052 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #include "src/mat/impls/sbaij/seq/sbaij.h" 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1059 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1060 { 1061 Mat C = *B; 1062 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1063 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1064 IS ip=b->row; 1065 PetscErrorCode ierr; 1066 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1067 PetscInt *ai=a->i,*aj=a->j; 1068 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1069 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1070 PetscReal zeropivot,rs,shiftnz; 1071 PetscTruth shiftpd; 1072 ChShift_Ctx sctx; 1073 PetscInt newshift; 1074 1075 PetscFunctionBegin; 1076 shiftnz = info->shiftnz; 1077 shiftpd = info->shiftpd; 1078 zeropivot = info->zeropivot; 1079 1080 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1081 1082 /* initialization */ 1083 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1084 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1085 jl = il + mbs; 1086 rtmp = (MatScalar*)(jl + mbs); 1087 1088 sctx.shift_amount = 0; 1089 sctx.nshift = 0; 1090 do { 1091 sctx.chshift = PETSC_FALSE; 1092 for (i=0; i<mbs; i++) { 1093 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1094 } 1095 1096 for (k = 0; k<mbs; k++){ 1097 bval = ba + bi[k]; 1098 /* initialize k-th row by the perm[k]-th row of A */ 1099 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1100 for (j = jmin; j < jmax; j++){ 1101 col = rip[aj[j]]; 1102 if (col >= k){ /* only take upper triangular entry */ 1103 rtmp[col] = aa[j]; 1104 *bval++ = 0.0; /* for in-place factorization */ 1105 } 1106 } 1107 /* shift the diagonal of the matrix */ 1108 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1109 1110 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1111 dk = rtmp[k]; 1112 i = jl[k]; /* first row to be added to k_th row */ 1113 1114 while (i < k){ 1115 nexti = jl[i]; /* next row to be added to k_th row */ 1116 1117 /* compute multiplier, update diag(k) and U(i,k) */ 1118 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1119 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1120 dk += uikdi*ba[ili]; 1121 ba[ili] = uikdi; /* -U(i,k) */ 1122 1123 /* add multiple of row i to k-th row */ 1124 jmin = ili + 1; jmax = bi[i+1]; 1125 if (jmin < jmax){ 1126 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1127 /* update il and jl for row i */ 1128 il[i] = jmin; 1129 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1130 } 1131 i = nexti; 1132 } 1133 1134 /* shift the diagonals when zero pivot is detected */ 1135 /* compute rs=sum of abs(off-diagonal) */ 1136 rs = 0.0; 1137 jmin = bi[k]+1; 1138 nz = bi[k+1] - jmin; 1139 if (nz){ 1140 bcol = bj + jmin; 1141 while (nz--){ 1142 rs += PetscAbsScalar(rtmp[*bcol]); 1143 bcol++; 1144 } 1145 } 1146 1147 sctx.rs = rs; 1148 sctx.pv = dk; 1149 ierr = Mat_CholeskyCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 1150 if (newshift == 1){ 1151 break; /* sctx.shift_amount is updated */ 1152 } else if (newshift == -1){ 1153 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1154 } 1155 1156 /* copy data into U(k,:) */ 1157 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1158 jmin = bi[k]+1; jmax = bi[k+1]; 1159 if (jmin < jmax) { 1160 for (j=jmin; j<jmax; j++){ 1161 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1162 } 1163 /* add the k-th row into il and jl */ 1164 il[k] = jmin; 1165 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1166 } 1167 } 1168 } while (sctx.chshift); 1169 ierr = PetscFree(il);CHKERRQ(ierr); 1170 1171 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1172 C->factor = FACTOR_CHOLESKY; 1173 C->assembled = PETSC_TRUE; 1174 C->preallocated = PETSC_TRUE; 1175 PetscLogFlops(C->m); 1176 if (sctx.nshift){ 1177 if (shiftnz) { 1178 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1179 } else if (shiftpd) { 1180 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1181 } 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1188 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1189 { 1190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1191 Mat_SeqSBAIJ *b; 1192 Mat B; 1193 PetscErrorCode ierr; 1194 PetscTruth perm_identity; 1195 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1196 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1197 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1198 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1199 PetscReal fill=info->fill,levels=info->levels; 1200 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1201 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1202 PetscBT lnkbt; 1203 1204 PetscFunctionBegin; 1205 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1206 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1207 1208 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1209 ui[0] = 0; 1210 1211 /* special case that simply copies fill pattern */ 1212 if (!levels && perm_identity) { 1213 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1214 for (i=0; i<am; i++) { 1215 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1216 } 1217 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1218 cols = uj; 1219 for (i=0; i<am; i++) { 1220 aj = a->j + a->diag[i]; 1221 ncols = ui[i+1] - ui[i]; 1222 for (j=0; j<ncols; j++) *cols++ = *aj++; 1223 } 1224 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1225 /* initialization */ 1226 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1227 1228 /* jl: linked list for storing indices of the pivot rows 1229 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1230 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1231 il = jl + am; 1232 uj_ptr = (PetscInt**)(il + am); 1233 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1234 for (i=0; i<am; i++){ 1235 jl[i] = am; il[i] = 0; 1236 } 1237 1238 /* create and initialize a linked list for storing column indices of the active row k */ 1239 nlnk = am + 1; 1240 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1241 1242 /* initial FreeSpace size is fill*(ai[am]+1) */ 1243 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1244 current_space = free_space; 1245 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1246 current_space_lvl = free_space_lvl; 1247 1248 for (k=0; k<am; k++){ /* for each active row k */ 1249 /* initialize lnk by the column indices of row rip[k] of A */ 1250 nzk = 0; 1251 ncols = ai[rip[k]+1] - ai[rip[k]]; 1252 ncols_upper = 0; 1253 for (j=0; j<ncols; j++){ 1254 i = *(aj + ai[rip[k]] + j); 1255 if (rip[i] >= k){ /* only take upper triangular entry */ 1256 ajtmp[ncols_upper] = i; 1257 ncols_upper++; 1258 } 1259 } 1260 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1261 nzk += nlnk; 1262 1263 /* update lnk by computing fill-in for each pivot row to be merged in */ 1264 prow = jl[k]; /* 1st pivot row */ 1265 1266 while (prow < k){ 1267 nextprow = jl[prow]; 1268 1269 /* merge prow into k-th row */ 1270 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1271 jmax = ui[prow+1]; 1272 ncols = jmax-jmin; 1273 i = jmin - ui[prow]; 1274 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1275 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1276 j = *(uj - 1); 1277 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1278 nzk += nlnk; 1279 1280 /* update il and jl for prow */ 1281 if (jmin < jmax){ 1282 il[prow] = jmin; 1283 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1284 } 1285 prow = nextprow; 1286 } 1287 1288 /* if free space is not available, make more free space */ 1289 if (current_space->local_remaining<nzk) { 1290 i = am - k + 1; /* num of unfactored rows */ 1291 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1292 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1293 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1294 reallocs++; 1295 } 1296 1297 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1298 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1299 1300 /* add the k-th row into il and jl */ 1301 if (nzk > 1){ 1302 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1303 jl[k] = jl[i]; jl[i] = k; 1304 il[k] = ui[k] + 1; 1305 } 1306 uj_ptr[k] = current_space->array; 1307 uj_lvl_ptr[k] = current_space_lvl->array; 1308 1309 current_space->array += nzk; 1310 current_space->local_used += nzk; 1311 current_space->local_remaining -= nzk; 1312 1313 current_space_lvl->array += nzk; 1314 current_space_lvl->local_used += nzk; 1315 current_space_lvl->local_remaining -= nzk; 1316 1317 ui[k+1] = ui[k] + nzk; 1318 } 1319 1320 if (ai[am] != 0) { 1321 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1322 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1323 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1324 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1325 } else { 1326 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1327 } 1328 1329 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1330 ierr = PetscFree(jl);CHKERRQ(ierr); 1331 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1332 1333 /* destroy list of free space and other temporary array(s) */ 1334 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1335 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1336 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1337 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1338 1339 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1340 1341 /* put together the new matrix in MATSEQSBAIJ format */ 1342 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1343 B = *fact; 1344 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1345 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1346 1347 b = (Mat_SeqSBAIJ*)B->data; 1348 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1349 b->singlemalloc = PETSC_FALSE; 1350 /* the next line frees the default space generated by the Create() */ 1351 ierr = PetscFree(b->a);CHKERRQ(ierr); 1352 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1353 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1354 b->j = uj; 1355 b->i = ui; 1356 b->diag = 0; 1357 b->ilen = 0; 1358 b->imax = 0; 1359 b->row = perm; 1360 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1361 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1362 b->icol = perm; 1363 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1364 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1365 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1366 b->maxnz = b->nz = ui[am]; 1367 1368 B->factor = FACTOR_CHOLESKY; 1369 B->info.factor_mallocs = reallocs; 1370 B->info.fill_ratio_given = fill; 1371 if (ai[am] != 0) { 1372 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1373 } else { 1374 B->info.fill_ratio_needed = 0.0; 1375 } 1376 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1377 if (perm_identity){ 1378 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1379 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1386 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1387 { 1388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1389 Mat_SeqSBAIJ *b; 1390 Mat B; 1391 PetscErrorCode ierr; 1392 PetscTruth perm_identity; 1393 PetscReal fill = info->fill; 1394 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1395 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1396 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1397 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1398 PetscBT lnkbt; 1399 IS iperm; 1400 1401 PetscFunctionBegin; 1402 /* check whether perm is the identity mapping */ 1403 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1404 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1405 1406 if (!perm_identity){ 1407 /* check if perm is symmetric! */ 1408 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1409 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1410 for (i=0; i<am; i++) { 1411 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1412 } 1413 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1414 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1415 } 1416 1417 /* initialization */ 1418 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1419 ui[0] = 0; 1420 1421 /* jl: linked list for storing indices of the pivot rows 1422 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1423 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1424 il = jl + am; 1425 cols = il + am; 1426 ui_ptr = (PetscInt**)(cols + am); 1427 for (i=0; i<am; i++){ 1428 jl[i] = am; il[i] = 0; 1429 } 1430 1431 /* create and initialize a linked list for storing column indices of the active row k */ 1432 nlnk = am + 1; 1433 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1434 1435 /* initial FreeSpace size is fill*(ai[am]+1) */ 1436 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1437 current_space = free_space; 1438 1439 for (k=0; k<am; k++){ /* for each active row k */ 1440 /* initialize lnk by the column indices of row rip[k] of A */ 1441 nzk = 0; 1442 ncols = ai[rip[k]+1] - ai[rip[k]]; 1443 ncols_upper = 0; 1444 for (j=0; j<ncols; j++){ 1445 i = rip[*(aj + ai[rip[k]] + j)]; 1446 if (i >= k){ /* only take upper triangular entry */ 1447 cols[ncols_upper] = i; 1448 ncols_upper++; 1449 } 1450 } 1451 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1452 nzk += nlnk; 1453 1454 /* update lnk by computing fill-in for each pivot row to be merged in */ 1455 prow = jl[k]; /* 1st pivot row */ 1456 1457 while (prow < k){ 1458 nextprow = jl[prow]; 1459 /* merge prow into k-th row */ 1460 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1461 jmax = ui[prow+1]; 1462 ncols = jmax-jmin; 1463 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1464 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1465 nzk += nlnk; 1466 1467 /* update il and jl for prow */ 1468 if (jmin < jmax){ 1469 il[prow] = jmin; 1470 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1471 } 1472 prow = nextprow; 1473 } 1474 1475 /* if free space is not available, make more free space */ 1476 if (current_space->local_remaining<nzk) { 1477 i = am - k + 1; /* num of unfactored rows */ 1478 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1479 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1480 reallocs++; 1481 } 1482 1483 /* copy data into free space, then initialize lnk */ 1484 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1485 1486 /* add the k-th row into il and jl */ 1487 if (nzk-1 > 0){ 1488 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1489 jl[k] = jl[i]; jl[i] = k; 1490 il[k] = ui[k] + 1; 1491 } 1492 ui_ptr[k] = current_space->array; 1493 current_space->array += nzk; 1494 current_space->local_used += nzk; 1495 current_space->local_remaining -= nzk; 1496 1497 ui[k+1] = ui[k] + nzk; 1498 } 1499 1500 if (ai[am] != 0) { 1501 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1502 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1503 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1504 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1505 } else { 1506 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1507 } 1508 1509 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1510 ierr = PetscFree(jl);CHKERRQ(ierr); 1511 1512 /* destroy list of free space and other temporary array(s) */ 1513 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1514 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1515 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1516 1517 /* put together the new matrix in MATSEQSBAIJ format */ 1518 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1519 B = *fact; 1520 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1521 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1522 1523 b = (Mat_SeqSBAIJ*)B->data; 1524 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1525 b->singlemalloc = PETSC_FALSE; 1526 /* the next line frees the default space generated by the Create() */ 1527 ierr = PetscFree(b->a);CHKERRQ(ierr); 1528 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1529 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1530 b->j = uj; 1531 b->i = ui; 1532 b->diag = 0; 1533 b->ilen = 0; 1534 b->imax = 0; 1535 b->row = perm; 1536 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1537 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1538 b->icol = perm; 1539 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1540 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1541 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1542 b->maxnz = b->nz = ui[am]; 1543 1544 B->factor = FACTOR_CHOLESKY; 1545 B->info.factor_mallocs = reallocs; 1546 B->info.fill_ratio_given = fill; 1547 if (ai[am] != 0) { 1548 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1549 } else { 1550 B->info.fill_ratio_needed = 0.0; 1551 } 1552 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1553 if (perm_identity){ 1554 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1555 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559