1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->freedata = PETSC_TRUE; 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 249 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 271 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 293 im = cols + n; 294 bi_ptr = (PetscInt**)(im + n); 295 296 /* initial FreeSpace size is f*(ai[n]+1) */ 297 f = info->fill; 298 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 299 current_space = free_space; 300 301 for (i=0; i<n; i++) { 302 /* copy previous fill into linked list */ 303 nzi = 0; 304 nnz = ai[r[i]+1] - ai[r[i]]; 305 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 306 ajtmp = aj + ai[r[i]]; 307 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 308 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 309 nzi += nlnk; 310 311 /* add pivot rows into linked list */ 312 row = lnk[n]; 313 while (row < i) { 314 nzbd = bdiag[row] - bi[row] + 1; 315 ajtmp = bi_ptr[row] + nzbd; 316 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 317 im[row] = nzbd; 318 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 319 nzi += nlnk; 320 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 321 322 row = lnk[row]; 323 } 324 325 bi[i+1] = bi[i] + nzi; 326 im[i] = nzi; 327 328 /* mark bdiag */ 329 nzbd = 0; 330 nnz = nzi; 331 k = lnk[n]; 332 while (nnz-- && k < i){ 333 nzbd++; 334 k = lnk[k]; 335 } 336 bdiag[i] = bi[i] + nzbd; 337 338 /* if free space is not available, make more free space */ 339 if (current_space->local_remaining<nzi) { 340 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 341 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 342 reallocs++; 343 } 344 345 /* copy data into free space, then initialize lnk */ 346 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 347 bi_ptr[i] = current_space->array; 348 current_space->array += nzi; 349 current_space->local_used += nzi; 350 current_space->local_remaining -= nzi; 351 } 352 #if defined(PETSC_USE_DEBUG) 353 if (ai[n] != 0) { 354 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 355 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 356 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 359 } else { 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 361 } 362 #endif 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 375 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 376 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(*B)->data; 379 b->freedata = PETSC_TRUE; 380 b->singlemalloc = PETSC_FALSE; 381 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 382 b->j = bj; 383 b->i = bi; 384 b->diag = bdiag; 385 b->ilen = 0; 386 b->imax = 0; 387 b->row = isrow; 388 b->col = iscol; 389 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 390 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 391 b->icol = isicol; 392 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 393 394 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 395 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 396 b->maxnz = b->nz = bi[n] ; 397 398 (*B)->factor = FACTOR_LU; 399 (*B)->info.factor_mallocs = reallocs; 400 (*B)->info.fill_ratio_given = f; 401 402 if (ai[n] != 0) { 403 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 404 } else { 405 (*B)->info.fill_ratio_needed = 0.0; 406 } 407 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 408 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 409 PetscFunctionReturn(0); 410 } 411 412 /* ----------------------------------------------------------- */ 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 415 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 416 { 417 Mat C=*B; 418 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 419 IS isrow = b->row,isicol = b->icol; 420 PetscErrorCode ierr; 421 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 422 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 423 PetscInt *diag_offset = b->diag,diag,*pj; 424 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 425 PetscScalar d; 426 PetscReal rs; 427 LUShift_Ctx sctx; 428 PetscInt newshift; 429 430 PetscFunctionBegin; 431 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 432 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 433 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 434 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 435 rtmps = rtmp; ics = ic; 436 437 if (!a->diag) { 438 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 439 } 440 /* if both shift schemes are chosen by user, only use info->shiftpd */ 441 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 442 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 443 PetscInt *aai = a->i,*ddiag = a->diag; 444 sctx.shift_top = 0; 445 for (i=0; i<n; i++) { 446 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 447 d = (a->a)[ddiag[i]]; 448 rs = -PetscAbsScalar(d) - PetscRealPart(d); 449 v = a->a+aai[i]; 450 nz = aai[i+1] - aai[i]; 451 for (j=0; j<nz; j++) 452 rs += PetscAbsScalar(v[j]); 453 if (rs>sctx.shift_top) sctx.shift_top = rs; 454 } 455 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 456 sctx.shift_top *= 1.1; 457 sctx.nshift_max = 5; 458 sctx.shift_lo = 0.; 459 sctx.shift_hi = 1.; 460 } 461 462 sctx.shift_amount = 0; 463 sctx.nshift = 0; 464 do { 465 sctx.lushift = PETSC_FALSE; 466 for (i=0; i<n; i++){ 467 nz = bi[i+1] - bi[i]; 468 bjtmp = bj + bi[i]; 469 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 470 471 /* load in initial (unfactored row) */ 472 nz = a->i[r[i]+1] - a->i[r[i]]; 473 ajtmp = a->j + a->i[r[i]]; 474 v = a->a + a->i[r[i]]; 475 for (j=0; j<nz; j++) { 476 rtmp[ics[ajtmp[j]]] = v[j]; 477 } 478 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 479 480 row = *bjtmp++; 481 while (row < i) { 482 pc = rtmp + row; 483 if (*pc != 0.0) { 484 pv = b->a + diag_offset[row]; 485 pj = b->j + diag_offset[row] + 1; 486 multiplier = *pc / *pv++; 487 *pc = multiplier; 488 nz = bi[row+1] - diag_offset[row] - 1; 489 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 490 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 491 } 492 row = *bjtmp++; 493 } 494 /* finished row so stick it into b->a */ 495 pv = b->a + bi[i] ; 496 pj = b->j + bi[i] ; 497 nz = bi[i+1] - bi[i]; 498 diag = diag_offset[i] - bi[i]; 499 rs = 0.0; 500 for (j=0; j<nz; j++) { 501 pv[j] = rtmps[pj[j]]; 502 if (j != diag) rs += PetscAbsScalar(pv[j]); 503 } 504 505 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 506 sctx.rs = rs; 507 sctx.pv = pv[diag]; 508 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 509 if (newshift == 1){ 510 break; /* sctx.shift_amount is updated */ 511 } else if (newshift == -1){ 512 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 513 } 514 } 515 516 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 517 /* 518 * if no shift in this attempt & shifting & started shifting & can refine, 519 * then try lower shift 520 */ 521 sctx.shift_hi = info->shift_fraction; 522 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 523 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 524 sctx.lushift = PETSC_TRUE; 525 sctx.nshift++; 526 } 527 } while (sctx.lushift); 528 529 /* invert diagonal entries for simplier triangular solves */ 530 for (i=0; i<n; i++) { 531 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 532 } 533 534 ierr = PetscFree(rtmp);CHKERRQ(ierr); 535 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 536 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 537 C->factor = FACTOR_LU; 538 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 539 C->assembled = PETSC_TRUE; 540 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 541 if (sctx.nshift){ 542 if (info->shiftnz) { 543 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 544 } else if (info->shiftpd) { 545 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 553 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 554 { 555 PetscFunctionBegin; 556 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 557 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 558 PetscFunctionReturn(0); 559 } 560 561 562 /* ----------------------------------------------------------- */ 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatLUFactor_SeqAIJ" 565 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 566 { 567 PetscErrorCode ierr; 568 Mat C; 569 570 PetscFunctionBegin; 571 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 572 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 573 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 574 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 /* ----------------------------------------------------------- */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatSolve_SeqAIJ" 580 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 581 { 582 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 583 IS iscol = a->col,isrow = a->row; 584 PetscErrorCode ierr; 585 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 586 PetscInt nz,*rout,*cout; 587 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 588 589 PetscFunctionBegin; 590 if (!n) PetscFunctionReturn(0); 591 592 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 593 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 594 tmp = a->solve_work; 595 596 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 597 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 598 599 /* forward solve the lower triangular */ 600 tmp[0] = b[*r++]; 601 tmps = tmp; 602 for (i=1; i<n; i++) { 603 v = aa + ai[i] ; 604 vi = aj + ai[i] ; 605 nz = a->diag[i] - ai[i]; 606 sum = b[*r++]; 607 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 608 tmp[i] = sum; 609 } 610 611 /* backward solve the upper triangular */ 612 for (i=n-1; i>=0; i--){ 613 v = aa + a->diag[i] + 1; 614 vi = aj + a->diag[i] + 1; 615 nz = ai[i+1] - a->diag[i] - 1; 616 sum = tmp[i]; 617 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 618 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 619 } 620 621 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 622 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 623 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 625 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 /* ----------------------------------------------------------- */ 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 632 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 633 { 634 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 635 PetscErrorCode ierr; 636 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 637 PetscScalar *x,*b,*aa = a->a; 638 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 639 PetscInt adiag_i,i,*vi,nz,ai_i; 640 PetscScalar *v,sum; 641 #endif 642 643 PetscFunctionBegin; 644 if (!n) PetscFunctionReturn(0); 645 646 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 647 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 648 649 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 650 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 651 #else 652 /* forward solve the lower triangular */ 653 x[0] = b[0]; 654 for (i=1; i<n; i++) { 655 ai_i = ai[i]; 656 v = aa + ai_i; 657 vi = aj + ai_i; 658 nz = adiag[i] - ai_i; 659 sum = b[i]; 660 while (nz--) sum -= *v++ * x[*vi++]; 661 x[i] = sum; 662 } 663 664 /* backward solve the upper triangular */ 665 for (i=n-1; i>=0; i--){ 666 adiag_i = adiag[i]; 667 v = aa + adiag_i + 1; 668 vi = aj + adiag_i + 1; 669 nz = ai[i+1] - adiag_i - 1; 670 sum = x[i]; 671 while (nz--) sum -= *v++ * x[*vi++]; 672 x[i] = sum*aa[adiag_i]; 673 } 674 #endif 675 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 676 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 683 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 684 { 685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 686 IS iscol = a->col,isrow = a->row; 687 PetscErrorCode ierr; 688 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 689 PetscInt nz,*rout,*cout; 690 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 691 692 PetscFunctionBegin; 693 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 694 695 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 696 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 697 tmp = a->solve_work; 698 699 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 700 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 701 702 /* forward solve the lower triangular */ 703 tmp[0] = b[*r++]; 704 for (i=1; i<n; i++) { 705 v = aa + ai[i] ; 706 vi = aj + ai[i] ; 707 nz = a->diag[i] - ai[i]; 708 sum = b[*r++]; 709 while (nz--) sum -= *v++ * tmp[*vi++ ]; 710 tmp[i] = sum; 711 } 712 713 /* backward solve the upper triangular */ 714 for (i=n-1; i>=0; i--){ 715 v = aa + a->diag[i] + 1; 716 vi = aj + a->diag[i] + 1; 717 nz = ai[i+1] - a->diag[i] - 1; 718 sum = tmp[i]; 719 while (nz--) sum -= *v++ * tmp[*vi++ ]; 720 tmp[i] = sum*aa[a->diag[i]]; 721 x[*c--] += tmp[i]; 722 } 723 724 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 725 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 726 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 727 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 728 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 729 730 PetscFunctionReturn(0); 731 } 732 /* -------------------------------------------------------------------*/ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 735 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 736 { 737 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 738 IS iscol = a->col,isrow = a->row; 739 PetscErrorCode ierr; 740 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 741 PetscInt nz,*rout,*cout,*diag = a->diag; 742 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 743 744 PetscFunctionBegin; 745 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 746 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 747 tmp = a->solve_work; 748 749 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 750 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 751 752 /* copy the b into temp work space according to permutation */ 753 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 754 755 /* forward solve the U^T */ 756 for (i=0; i<n; i++) { 757 v = aa + diag[i] ; 758 vi = aj + diag[i] + 1; 759 nz = ai[i+1] - diag[i] - 1; 760 s1 = tmp[i]; 761 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 762 while (nz--) { 763 tmp[*vi++ ] -= (*v++)*s1; 764 } 765 tmp[i] = s1; 766 } 767 768 /* backward solve the L^T */ 769 for (i=n-1; i>=0; i--){ 770 v = aa + diag[i] - 1 ; 771 vi = aj + diag[i] - 1 ; 772 nz = diag[i] - ai[i]; 773 s1 = tmp[i]; 774 while (nz--) { 775 tmp[*vi-- ] -= (*v--)*s1; 776 } 777 } 778 779 /* copy tmp into x according to permutation */ 780 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 781 782 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 783 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 784 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 785 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 786 787 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 788 PetscFunctionReturn(0); 789 } 790 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 793 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 794 { 795 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 796 IS iscol = a->col,isrow = a->row; 797 PetscErrorCode ierr; 798 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 799 PetscInt nz,*rout,*cout,*diag = a->diag; 800 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 801 802 PetscFunctionBegin; 803 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 804 805 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 806 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 807 tmp = a->solve_work; 808 809 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 810 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 811 812 /* copy the b into temp work space according to permutation */ 813 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 814 815 /* forward solve the U^T */ 816 for (i=0; i<n; i++) { 817 v = aa + diag[i] ; 818 vi = aj + diag[i] + 1; 819 nz = ai[i+1] - diag[i] - 1; 820 tmp[i] *= *v++; 821 while (nz--) { 822 tmp[*vi++ ] -= (*v++)*tmp[i]; 823 } 824 } 825 826 /* backward solve the L^T */ 827 for (i=n-1; i>=0; i--){ 828 v = aa + diag[i] - 1 ; 829 vi = aj + diag[i] - 1 ; 830 nz = diag[i] - ai[i]; 831 while (nz--) { 832 tmp[*vi-- ] -= (*v--)*tmp[i]; 833 } 834 } 835 836 /* copy tmp into x according to permutation */ 837 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 838 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 842 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 843 844 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 /* ----------------------------------------------------------------*/ 848 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 852 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 853 { 854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 855 IS isicol; 856 PetscErrorCode ierr; 857 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 858 PetscInt *bi,*cols,nnz,*cols_lvl; 859 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 860 PetscInt i,levels,diagonal_fill; 861 PetscTruth col_identity,row_identity; 862 PetscReal f; 863 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 864 PetscBT lnkbt; 865 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 866 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 867 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 868 869 PetscFunctionBegin; 870 f = info->fill; 871 levels = (PetscInt)info->levels; 872 diagonal_fill = (PetscInt)info->diagonal_fill; 873 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 874 875 /* special case that simply copies fill pattern */ 876 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 877 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 878 if (!levels && row_identity && col_identity) { 879 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 880 (*fact)->factor = FACTOR_LU; 881 b = (Mat_SeqAIJ*)(*fact)->data; 882 if (!b->diag) { 883 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 884 } 885 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 886 b->row = isrow; 887 b->col = iscol; 888 b->icol = isicol; 889 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 890 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 891 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 892 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 897 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 898 899 /* get new row pointers */ 900 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 901 bi[0] = 0; 902 /* bdiag is location of diagonal in factor */ 903 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 904 bdiag[0] = 0; 905 906 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 907 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 908 909 /* create a linked list for storing column indices of the active row */ 910 nlnk = n + 1; 911 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 912 913 /* initial FreeSpace size is f*(ai[n]+1) */ 914 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 915 current_space = free_space; 916 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 917 current_space_lvl = free_space_lvl; 918 919 for (i=0; i<n; i++) { 920 nzi = 0; 921 /* copy current row into linked list */ 922 nnz = ai[r[i]+1] - ai[r[i]]; 923 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 924 cols = aj + ai[r[i]]; 925 lnk[i] = -1; /* marker to indicate if diagonal exists */ 926 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 927 nzi += nlnk; 928 929 /* make sure diagonal entry is included */ 930 if (diagonal_fill && lnk[i] == -1) { 931 fm = n; 932 while (lnk[fm] < i) fm = lnk[fm]; 933 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 934 lnk[fm] = i; 935 lnk_lvl[i] = 0; 936 nzi++; dcount++; 937 } 938 939 /* add pivot rows into the active row */ 940 nzbd = 0; 941 prow = lnk[n]; 942 while (prow < i) { 943 nnz = bdiag[prow]; 944 cols = bj_ptr[prow] + nnz + 1; 945 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 946 nnz = bi[prow+1] - bi[prow] - nnz - 1; 947 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 948 nzi += nlnk; 949 prow = lnk[prow]; 950 nzbd++; 951 } 952 bdiag[i] = nzbd; 953 bi[i+1] = bi[i] + nzi; 954 955 /* if free space is not available, make more free space */ 956 if (current_space->local_remaining<nzi) { 957 nnz = nzi*(n - i); /* estimated and max additional space needed */ 958 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 959 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 960 reallocs++; 961 } 962 963 /* copy data into free_space and free_space_lvl, then initialize lnk */ 964 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 965 bj_ptr[i] = current_space->array; 966 bjlvl_ptr[i] = current_space_lvl->array; 967 968 /* make sure the active row i has diagonal entry */ 969 if (*(bj_ptr[i]+bdiag[i]) != i) { 970 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 971 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 972 } 973 974 current_space->array += nzi; 975 current_space->local_used += nzi; 976 current_space->local_remaining -= nzi; 977 current_space_lvl->array += nzi; 978 current_space_lvl->local_used += nzi; 979 current_space_lvl->local_remaining -= nzi; 980 } 981 982 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 983 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 984 985 /* destroy list of free space and other temporary arrays */ 986 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 987 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 988 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 989 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 990 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 991 992 #if defined(PETSC_USE_DEBUG) 993 { 994 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 995 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 996 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 998 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 999 if (diagonal_fill) { 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1001 } 1002 } 1003 #endif 1004 1005 /* put together the new matrix */ 1006 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1007 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1008 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1009 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1010 b = (Mat_SeqAIJ*)(*fact)->data; 1011 b->freedata = PETSC_TRUE; 1012 b->singlemalloc = PETSC_FALSE; 1013 len = (bi[n] )*sizeof(PetscScalar); 1014 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1015 b->j = bj; 1016 b->i = bi; 1017 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1018 b->diag = bdiag; 1019 b->ilen = 0; 1020 b->imax = 0; 1021 b->row = isrow; 1022 b->col = iscol; 1023 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1024 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1025 b->icol = isicol; 1026 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1027 /* In b structure: Free imax, ilen, old a, old j. 1028 Allocate bdiag, solve_work, new a, new j */ 1029 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1030 b->maxnz = b->nz = bi[n] ; 1031 (*fact)->factor = FACTOR_LU; 1032 (*fact)->info.factor_mallocs = reallocs; 1033 (*fact)->info.fill_ratio_given = f; 1034 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1035 1036 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1037 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #include "src/mat/impls/sbaij/seq/sbaij.h" 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1045 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1046 { 1047 Mat C = *B; 1048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1049 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1050 IS ip=b->row; 1051 PetscErrorCode ierr; 1052 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1053 PetscInt *ai=a->i,*aj=a->j; 1054 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1055 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1056 PetscReal zeropivot,rs,shiftnz; 1057 PetscTruth shiftpd; 1058 ChShift_Ctx sctx; 1059 PetscInt newshift; 1060 1061 PetscFunctionBegin; 1062 shiftnz = info->shiftnz; 1063 shiftpd = info->shiftpd; 1064 zeropivot = info->zeropivot; 1065 1066 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1067 1068 /* initialization */ 1069 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1070 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1071 jl = il + mbs; 1072 rtmp = (MatScalar*)(jl + mbs); 1073 1074 sctx.shift_amount = 0; 1075 sctx.nshift = 0; 1076 do { 1077 sctx.chshift = PETSC_FALSE; 1078 for (i=0; i<mbs; i++) { 1079 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1080 } 1081 1082 for (k = 0; k<mbs; k++){ 1083 bval = ba + bi[k]; 1084 /* initialize k-th row by the perm[k]-th row of A */ 1085 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1086 for (j = jmin; j < jmax; j++){ 1087 col = rip[aj[j]]; 1088 if (col >= k){ /* only take upper triangular entry */ 1089 rtmp[col] = aa[j]; 1090 *bval++ = 0.0; /* for in-place factorization */ 1091 } 1092 } 1093 /* shift the diagonal of the matrix */ 1094 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1095 1096 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1097 dk = rtmp[k]; 1098 i = jl[k]; /* first row to be added to k_th row */ 1099 1100 while (i < k){ 1101 nexti = jl[i]; /* next row to be added to k_th row */ 1102 1103 /* compute multiplier, update diag(k) and U(i,k) */ 1104 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1105 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1106 dk += uikdi*ba[ili]; 1107 ba[ili] = uikdi; /* -U(i,k) */ 1108 1109 /* add multiple of row i to k-th row */ 1110 jmin = ili + 1; jmax = bi[i+1]; 1111 if (jmin < jmax){ 1112 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1113 /* update il and jl for row i */ 1114 il[i] = jmin; 1115 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1116 } 1117 i = nexti; 1118 } 1119 1120 /* shift the diagonals when zero pivot is detected */ 1121 /* compute rs=sum of abs(off-diagonal) */ 1122 rs = 0.0; 1123 jmin = bi[k]+1; 1124 nz = bi[k+1] - jmin; 1125 if (nz){ 1126 bcol = bj + jmin; 1127 while (nz--){ 1128 rs += PetscAbsScalar(rtmp[*bcol]); 1129 bcol++; 1130 } 1131 } 1132 1133 sctx.rs = rs; 1134 sctx.pv = dk; 1135 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1136 if (newshift == 1){ 1137 break; /* sctx.shift_amount is updated */ 1138 } else if (newshift == -1){ 1139 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1140 } 1141 1142 /* copy data into U(k,:) */ 1143 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1144 jmin = bi[k]+1; jmax = bi[k+1]; 1145 if (jmin < jmax) { 1146 for (j=jmin; j<jmax; j++){ 1147 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1148 } 1149 /* add the k-th row into il and jl */ 1150 il[k] = jmin; 1151 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1152 } 1153 } 1154 } while (sctx.chshift); 1155 ierr = PetscFree(il);CHKERRQ(ierr); 1156 1157 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1158 C->factor = FACTOR_CHOLESKY; 1159 C->assembled = PETSC_TRUE; 1160 C->preallocated = PETSC_TRUE; 1161 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1162 if (sctx.nshift){ 1163 if (shiftnz) { 1164 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1165 } else if (shiftpd) { 1166 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1167 } 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1174 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1175 { 1176 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1177 Mat_SeqSBAIJ *b; 1178 Mat B; 1179 PetscErrorCode ierr; 1180 PetscTruth perm_identity; 1181 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1182 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1183 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1184 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1185 PetscReal fill=info->fill,levels=info->levels; 1186 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1187 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1188 PetscBT lnkbt; 1189 1190 PetscFunctionBegin; 1191 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1192 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1193 1194 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1195 ui[0] = 0; 1196 1197 /* special case that simply copies fill pattern */ 1198 if (!levels && perm_identity) { 1199 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1200 for (i=0; i<am; i++) { 1201 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1202 } 1203 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1204 cols = uj; 1205 for (i=0; i<am; i++) { 1206 aj = a->j + a->diag[i]; 1207 ncols = ui[i+1] - ui[i]; 1208 for (j=0; j<ncols; j++) *cols++ = *aj++; 1209 } 1210 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1211 /* initialization */ 1212 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1213 1214 /* jl: linked list for storing indices of the pivot rows 1215 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1216 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1217 il = jl + am; 1218 uj_ptr = (PetscInt**)(il + am); 1219 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1220 for (i=0; i<am; i++){ 1221 jl[i] = am; il[i] = 0; 1222 } 1223 1224 /* create and initialize a linked list for storing column indices of the active row k */ 1225 nlnk = am + 1; 1226 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1227 1228 /* initial FreeSpace size is fill*(ai[am]+1) */ 1229 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1230 current_space = free_space; 1231 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1232 current_space_lvl = free_space_lvl; 1233 1234 for (k=0; k<am; k++){ /* for each active row k */ 1235 /* initialize lnk by the column indices of row rip[k] of A */ 1236 nzk = 0; 1237 ncols = ai[rip[k]+1] - ai[rip[k]]; 1238 ncols_upper = 0; 1239 for (j=0; j<ncols; j++){ 1240 i = *(aj + ai[rip[k]] + j); 1241 if (rip[i] >= k){ /* only take upper triangular entry */ 1242 ajtmp[ncols_upper] = i; 1243 ncols_upper++; 1244 } 1245 } 1246 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1247 nzk += nlnk; 1248 1249 /* update lnk by computing fill-in for each pivot row to be merged in */ 1250 prow = jl[k]; /* 1st pivot row */ 1251 1252 while (prow < k){ 1253 nextprow = jl[prow]; 1254 1255 /* merge prow into k-th row */ 1256 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1257 jmax = ui[prow+1]; 1258 ncols = jmax-jmin; 1259 i = jmin - ui[prow]; 1260 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1261 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1262 j = *(uj - 1); 1263 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1264 nzk += nlnk; 1265 1266 /* update il and jl for prow */ 1267 if (jmin < jmax){ 1268 il[prow] = jmin; 1269 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1270 } 1271 prow = nextprow; 1272 } 1273 1274 /* if free space is not available, make more free space */ 1275 if (current_space->local_remaining<nzk) { 1276 i = am - k + 1; /* num of unfactored rows */ 1277 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1278 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1279 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1280 reallocs++; 1281 } 1282 1283 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1284 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1285 1286 /* add the k-th row into il and jl */ 1287 if (nzk > 1){ 1288 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1289 jl[k] = jl[i]; jl[i] = k; 1290 il[k] = ui[k] + 1; 1291 } 1292 uj_ptr[k] = current_space->array; 1293 uj_lvl_ptr[k] = current_space_lvl->array; 1294 1295 current_space->array += nzk; 1296 current_space->local_used += nzk; 1297 current_space->local_remaining -= nzk; 1298 1299 current_space_lvl->array += nzk; 1300 current_space_lvl->local_used += nzk; 1301 current_space_lvl->local_remaining -= nzk; 1302 1303 ui[k+1] = ui[k] + nzk; 1304 } 1305 1306 #if defined(PETSC_USE_DEBUG) 1307 if (ai[am] != 0) { 1308 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1309 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1310 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1311 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1314 } 1315 #endif 1316 1317 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1318 ierr = PetscFree(jl);CHKERRQ(ierr); 1319 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1320 1321 /* destroy list of free space and other temporary array(s) */ 1322 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1323 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1324 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1325 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1326 1327 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1328 1329 /* put together the new matrix in MATSEQSBAIJ format */ 1330 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1331 B = *fact; 1332 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1333 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1334 1335 b = (Mat_SeqSBAIJ*)B->data; 1336 b->singlemalloc = PETSC_FALSE; 1337 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1338 b->j = uj; 1339 b->i = ui; 1340 b->diag = 0; 1341 b->ilen = 0; 1342 b->imax = 0; 1343 b->row = perm; 1344 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1345 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1346 b->icol = perm; 1347 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1348 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1349 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1350 b->maxnz = b->nz = ui[am]; 1351 1352 B->factor = FACTOR_CHOLESKY; 1353 B->info.factor_mallocs = reallocs; 1354 B->info.fill_ratio_given = fill; 1355 if (ai[am] != 0) { 1356 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1357 } else { 1358 B->info.fill_ratio_needed = 0.0; 1359 } 1360 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1361 if (perm_identity){ 1362 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1363 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1370 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1371 { 1372 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1373 Mat_SeqSBAIJ *b; 1374 Mat B; 1375 PetscErrorCode ierr; 1376 PetscTruth perm_identity; 1377 PetscReal fill = info->fill; 1378 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1379 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1380 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1381 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1382 PetscBT lnkbt; 1383 IS iperm; 1384 1385 PetscFunctionBegin; 1386 /* check whether perm is the identity mapping */ 1387 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1388 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1389 1390 if (!perm_identity){ 1391 /* check if perm is symmetric! */ 1392 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1393 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1394 for (i=0; i<am; i++) { 1395 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1396 } 1397 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1398 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1399 } 1400 1401 /* initialization */ 1402 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1403 ui[0] = 0; 1404 1405 /* jl: linked list for storing indices of the pivot rows 1406 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1407 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1408 il = jl + am; 1409 cols = il + am; 1410 ui_ptr = (PetscInt**)(cols + am); 1411 for (i=0; i<am; i++){ 1412 jl[i] = am; il[i] = 0; 1413 } 1414 1415 /* create and initialize a linked list for storing column indices of the active row k */ 1416 nlnk = am + 1; 1417 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1418 1419 /* initial FreeSpace size is fill*(ai[am]+1) */ 1420 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1421 current_space = free_space; 1422 1423 for (k=0; k<am; k++){ /* for each active row k */ 1424 /* initialize lnk by the column indices of row rip[k] of A */ 1425 nzk = 0; 1426 ncols = ai[rip[k]+1] - ai[rip[k]]; 1427 ncols_upper = 0; 1428 for (j=0; j<ncols; j++){ 1429 i = rip[*(aj + ai[rip[k]] + j)]; 1430 if (i >= k){ /* only take upper triangular entry */ 1431 cols[ncols_upper] = i; 1432 ncols_upper++; 1433 } 1434 } 1435 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1436 nzk += nlnk; 1437 1438 /* update lnk by computing fill-in for each pivot row to be merged in */ 1439 prow = jl[k]; /* 1st pivot row */ 1440 1441 while (prow < k){ 1442 nextprow = jl[prow]; 1443 /* merge prow into k-th row */ 1444 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1445 jmax = ui[prow+1]; 1446 ncols = jmax-jmin; 1447 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1448 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1449 nzk += nlnk; 1450 1451 /* update il and jl for prow */ 1452 if (jmin < jmax){ 1453 il[prow] = jmin; 1454 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1455 } 1456 prow = nextprow; 1457 } 1458 1459 /* if free space is not available, make more free space */ 1460 if (current_space->local_remaining<nzk) { 1461 i = am - k + 1; /* num of unfactored rows */ 1462 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1463 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1464 reallocs++; 1465 } 1466 1467 /* copy data into free space, then initialize lnk */ 1468 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1469 1470 /* add the k-th row into il and jl */ 1471 if (nzk-1 > 0){ 1472 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1473 jl[k] = jl[i]; jl[i] = k; 1474 il[k] = ui[k] + 1; 1475 } 1476 ui_ptr[k] = current_space->array; 1477 current_space->array += nzk; 1478 current_space->local_used += nzk; 1479 current_space->local_remaining -= nzk; 1480 1481 ui[k+1] = ui[k] + nzk; 1482 } 1483 1484 #if defined(PETSC_USE_DEBUG) 1485 if (ai[am] != 0) { 1486 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1487 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1488 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1489 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1490 } else { 1491 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1492 } 1493 #endif 1494 1495 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1496 ierr = PetscFree(jl);CHKERRQ(ierr); 1497 1498 /* destroy list of free space and other temporary array(s) */ 1499 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1500 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1501 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1502 1503 /* put together the new matrix in MATSEQSBAIJ format */ 1504 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1505 B = *fact; 1506 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1507 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1508 1509 b = (Mat_SeqSBAIJ*)B->data; 1510 b->singlemalloc = PETSC_FALSE; 1511 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1512 b->j = uj; 1513 b->i = ui; 1514 b->diag = 0; 1515 b->ilen = 0; 1516 b->imax = 0; 1517 b->row = perm; 1518 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1519 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1520 b->icol = perm; 1521 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1522 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1523 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1524 b->maxnz = b->nz = ui[am]; 1525 1526 B->factor = FACTOR_CHOLESKY; 1527 B->info.factor_mallocs = reallocs; 1528 B->info.fill_ratio_given = fill; 1529 if (ai[am] != 0) { 1530 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1531 } else { 1532 B->info.fill_ratio_needed = 0.0; 1533 } 1534 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1535 if (perm_identity){ 1536 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1537 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541