1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_DEBUG) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 352 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 353 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 354 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 355 } else { 356 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* ----------------------------------------------------------- */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 413 { 414 Mat C=*B; 415 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 416 IS isrow = b->row,isicol = b->icol; 417 PetscErrorCode ierr; 418 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 419 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 420 PetscInt *diag_offset = b->diag,diag,*pj; 421 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 422 PetscScalar d; 423 PetscReal rs; 424 LUShift_Ctx sctx; 425 PetscInt newshift; 426 427 PetscFunctionBegin; 428 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 429 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 430 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 431 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 432 rtmps = rtmp; ics = ic; 433 434 if (!a->diag) { 435 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 436 } 437 /* if both shift schemes are chosen by user, only use info->shiftpd */ 438 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 439 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 440 PetscInt *aai = a->i,*ddiag = a->diag; 441 sctx.shift_top = 0; 442 for (i=0; i<n; i++) { 443 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 444 d = (a->a)[ddiag[i]]; 445 rs = -PetscAbsScalar(d) - PetscRealPart(d); 446 v = a->a+aai[i]; 447 nz = aai[i+1] - aai[i]; 448 for (j=0; j<nz; j++) 449 rs += PetscAbsScalar(v[j]); 450 if (rs>sctx.shift_top) sctx.shift_top = rs; 451 } 452 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 453 sctx.shift_top *= 1.1; 454 sctx.nshift_max = 5; 455 sctx.shift_lo = 0.; 456 sctx.shift_hi = 1.; 457 } 458 459 sctx.shift_amount = 0; 460 sctx.nshift = 0; 461 do { 462 sctx.lushift = PETSC_FALSE; 463 for (i=0; i<n; i++){ 464 nz = bi[i+1] - bi[i]; 465 bjtmp = bj + bi[i]; 466 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 467 468 /* load in initial (unfactored row) */ 469 nz = a->i[r[i]+1] - a->i[r[i]]; 470 ajtmp = a->j + a->i[r[i]]; 471 v = a->a + a->i[r[i]]; 472 for (j=0; j<nz; j++) { 473 rtmp[ics[ajtmp[j]]] = v[j]; 474 } 475 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 476 477 row = *bjtmp++; 478 while (row < i) { 479 pc = rtmp + row; 480 if (*pc != 0.0) { 481 pv = b->a + diag_offset[row]; 482 pj = b->j + diag_offset[row] + 1; 483 multiplier = *pc / *pv++; 484 *pc = multiplier; 485 nz = bi[row+1] - diag_offset[row] - 1; 486 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 487 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 488 } 489 row = *bjtmp++; 490 } 491 /* finished row so stick it into b->a */ 492 pv = b->a + bi[i] ; 493 pj = b->j + bi[i] ; 494 nz = bi[i+1] - bi[i]; 495 diag = diag_offset[i] - bi[i]; 496 rs = 0.0; 497 for (j=0; j<nz; j++) { 498 pv[j] = rtmps[pj[j]]; 499 if (j != diag) rs += PetscAbsScalar(pv[j]); 500 } 501 502 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 503 sctx.rs = rs; 504 sctx.pv = pv[diag]; 505 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 506 if (newshift == 1){ 507 break; /* sctx.shift_amount is updated */ 508 } else if (newshift == -1){ 509 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 510 } 511 } 512 513 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 514 /* 515 * if no shift in this attempt & shifting & started shifting & can refine, 516 * then try lower shift 517 */ 518 sctx.shift_hi = info->shift_fraction; 519 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 520 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 521 sctx.lushift = PETSC_TRUE; 522 sctx.nshift++; 523 } 524 } while (sctx.lushift); 525 526 /* invert diagonal entries for simplier triangular solves */ 527 for (i=0; i<n; i++) { 528 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 529 } 530 531 ierr = PetscFree(rtmp);CHKERRQ(ierr); 532 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 533 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 534 C->factor = FACTOR_LU; 535 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 536 C->assembled = PETSC_TRUE; 537 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 538 if (sctx.nshift){ 539 if (info->shiftnz) { 540 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 541 } else if (info->shiftpd) { 542 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 543 } 544 } 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 550 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 551 { 552 PetscFunctionBegin; 553 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 554 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 555 PetscFunctionReturn(0); 556 } 557 558 559 /* ----------------------------------------------------------- */ 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatLUFactor_SeqAIJ" 562 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 563 { 564 PetscErrorCode ierr; 565 Mat C; 566 567 PetscFunctionBegin; 568 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 569 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 570 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 571 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 /* ----------------------------------------------------------- */ 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatSolve_SeqAIJ" 577 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 578 { 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 IS iscol = a->col,isrow = a->row; 581 PetscErrorCode ierr; 582 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 583 PetscInt nz,*rout,*cout; 584 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 585 586 PetscFunctionBegin; 587 if (!n) PetscFunctionReturn(0); 588 589 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 591 tmp = a->solve_work; 592 593 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 594 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 595 596 /* forward solve the lower triangular */ 597 tmp[0] = b[*r++]; 598 tmps = tmp; 599 for (i=1; i<n; i++) { 600 v = aa + ai[i] ; 601 vi = aj + ai[i] ; 602 nz = a->diag[i] - ai[i]; 603 sum = b[*r++]; 604 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 605 tmp[i] = sum; 606 } 607 608 /* backward solve the upper triangular */ 609 for (i=n-1; i>=0; i--){ 610 v = aa + a->diag[i] + 1; 611 vi = aj + a->diag[i] + 1; 612 nz = ai[i+1] - a->diag[i] - 1; 613 sum = tmp[i]; 614 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 615 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 616 } 617 618 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 619 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 620 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 621 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 622 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 /* ----------------------------------------------------------- */ 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 629 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 630 { 631 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 632 PetscErrorCode ierr; 633 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 634 PetscScalar *x,*b,*aa = a->a; 635 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 636 PetscInt adiag_i,i,*vi,nz,ai_i; 637 PetscScalar *v,sum; 638 #endif 639 640 PetscFunctionBegin; 641 if (!n) PetscFunctionReturn(0); 642 643 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 644 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 645 646 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 647 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 648 #else 649 /* forward solve the lower triangular */ 650 x[0] = b[0]; 651 for (i=1; i<n; i++) { 652 ai_i = ai[i]; 653 v = aa + ai_i; 654 vi = aj + ai_i; 655 nz = adiag[i] - ai_i; 656 sum = b[i]; 657 while (nz--) sum -= *v++ * x[*vi++]; 658 x[i] = sum; 659 } 660 661 /* backward solve the upper triangular */ 662 for (i=n-1; i>=0; i--){ 663 adiag_i = adiag[i]; 664 v = aa + adiag_i + 1; 665 vi = aj + adiag_i + 1; 666 nz = ai[i+1] - adiag_i - 1; 667 sum = x[i]; 668 while (nz--) sum -= *v++ * x[*vi++]; 669 x[i] = sum*aa[adiag_i]; 670 } 671 #endif 672 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 673 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 674 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 680 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 681 { 682 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 683 IS iscol = a->col,isrow = a->row; 684 PetscErrorCode ierr; 685 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 686 PetscInt nz,*rout,*cout; 687 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 688 689 PetscFunctionBegin; 690 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 691 692 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 694 tmp = a->solve_work; 695 696 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 697 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 698 699 /* forward solve the lower triangular */ 700 tmp[0] = b[*r++]; 701 for (i=1; i<n; i++) { 702 v = aa + ai[i] ; 703 vi = aj + ai[i] ; 704 nz = a->diag[i] - ai[i]; 705 sum = b[*r++]; 706 while (nz--) sum -= *v++ * tmp[*vi++ ]; 707 tmp[i] = sum; 708 } 709 710 /* backward solve the upper triangular */ 711 for (i=n-1; i>=0; i--){ 712 v = aa + a->diag[i] + 1; 713 vi = aj + a->diag[i] + 1; 714 nz = ai[i+1] - a->diag[i] - 1; 715 sum = tmp[i]; 716 while (nz--) sum -= *v++ * tmp[*vi++ ]; 717 tmp[i] = sum*aa[a->diag[i]]; 718 x[*c--] += tmp[i]; 719 } 720 721 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 722 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 726 727 PetscFunctionReturn(0); 728 } 729 /* -------------------------------------------------------------------*/ 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 732 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 733 { 734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 735 IS iscol = a->col,isrow = a->row; 736 PetscErrorCode ierr; 737 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 738 PetscInt nz,*rout,*cout,*diag = a->diag; 739 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 740 741 PetscFunctionBegin; 742 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 743 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 744 tmp = a->solve_work; 745 746 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 747 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 748 749 /* copy the b into temp work space according to permutation */ 750 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 751 752 /* forward solve the U^T */ 753 for (i=0; i<n; i++) { 754 v = aa + diag[i] ; 755 vi = aj + diag[i] + 1; 756 nz = ai[i+1] - diag[i] - 1; 757 s1 = tmp[i]; 758 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 759 while (nz--) { 760 tmp[*vi++ ] -= (*v++)*s1; 761 } 762 tmp[i] = s1; 763 } 764 765 /* backward solve the L^T */ 766 for (i=n-1; i>=0; i--){ 767 v = aa + diag[i] - 1 ; 768 vi = aj + diag[i] - 1 ; 769 nz = diag[i] - ai[i]; 770 s1 = tmp[i]; 771 while (nz--) { 772 tmp[*vi-- ] -= (*v--)*s1; 773 } 774 } 775 776 /* copy tmp into x according to permutation */ 777 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 778 779 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 780 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 781 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 782 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 783 784 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 790 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 791 { 792 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 793 IS iscol = a->col,isrow = a->row; 794 PetscErrorCode ierr; 795 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 796 PetscInt nz,*rout,*cout,*diag = a->diag; 797 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 798 799 PetscFunctionBegin; 800 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 801 802 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 tmp = a->solve_work; 805 806 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 807 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 808 809 /* copy the b into temp work space according to permutation */ 810 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 811 812 /* forward solve the U^T */ 813 for (i=0; i<n; i++) { 814 v = aa + diag[i] ; 815 vi = aj + diag[i] + 1; 816 nz = ai[i+1] - diag[i] - 1; 817 tmp[i] *= *v++; 818 while (nz--) { 819 tmp[*vi++ ] -= (*v++)*tmp[i]; 820 } 821 } 822 823 /* backward solve the L^T */ 824 for (i=n-1; i>=0; i--){ 825 v = aa + diag[i] - 1 ; 826 vi = aj + diag[i] - 1 ; 827 nz = diag[i] - ai[i]; 828 while (nz--) { 829 tmp[*vi-- ] -= (*v--)*tmp[i]; 830 } 831 } 832 833 /* copy tmp into x according to permutation */ 834 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 835 836 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 837 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 838 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 839 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 840 841 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 842 PetscFunctionReturn(0); 843 } 844 /* ----------------------------------------------------------------*/ 845 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 849 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 852 IS isicol; 853 PetscErrorCode ierr; 854 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 855 PetscInt *bi,*cols,nnz,*cols_lvl; 856 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 857 PetscInt i,levels,diagonal_fill; 858 PetscTruth col_identity,row_identity; 859 PetscReal f; 860 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 861 PetscBT lnkbt; 862 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 863 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 864 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 865 866 PetscFunctionBegin; 867 f = info->fill; 868 levels = (PetscInt)info->levels; 869 diagonal_fill = (PetscInt)info->diagonal_fill; 870 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 871 872 /* special case that simply copies fill pattern */ 873 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 874 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 875 if (!levels && row_identity && col_identity) { 876 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 877 (*fact)->factor = FACTOR_LU; 878 b = (Mat_SeqAIJ*)(*fact)->data; 879 if (!b->diag) { 880 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 881 } 882 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 883 b->row = isrow; 884 b->col = iscol; 885 b->icol = isicol; 886 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 887 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 888 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 889 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 894 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 895 896 /* get new row pointers */ 897 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 898 bi[0] = 0; 899 /* bdiag is location of diagonal in factor */ 900 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 901 bdiag[0] = 0; 902 903 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 904 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 905 906 /* create a linked list for storing column indices of the active row */ 907 nlnk = n + 1; 908 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 909 910 /* initial FreeSpace size is f*(ai[n]+1) */ 911 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 912 current_space = free_space; 913 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 914 current_space_lvl = free_space_lvl; 915 916 for (i=0; i<n; i++) { 917 nzi = 0; 918 /* copy current row into linked list */ 919 nnz = ai[r[i]+1] - ai[r[i]]; 920 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 921 cols = aj + ai[r[i]]; 922 lnk[i] = -1; /* marker to indicate if diagonal exists */ 923 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 924 nzi += nlnk; 925 926 /* make sure diagonal entry is included */ 927 if (diagonal_fill && lnk[i] == -1) { 928 fm = n; 929 while (lnk[fm] < i) fm = lnk[fm]; 930 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 931 lnk[fm] = i; 932 lnk_lvl[i] = 0; 933 nzi++; dcount++; 934 } 935 936 /* add pivot rows into the active row */ 937 nzbd = 0; 938 prow = lnk[n]; 939 while (prow < i) { 940 nnz = bdiag[prow]; 941 cols = bj_ptr[prow] + nnz + 1; 942 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 943 nnz = bi[prow+1] - bi[prow] - nnz - 1; 944 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 945 nzi += nlnk; 946 prow = lnk[prow]; 947 nzbd++; 948 } 949 bdiag[i] = nzbd; 950 bi[i+1] = bi[i] + nzi; 951 952 /* if free space is not available, make more free space */ 953 if (current_space->local_remaining<nzi) { 954 nnz = nzi*(n - i); /* estimated and max additional space needed */ 955 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 956 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 957 reallocs++; 958 } 959 960 /* copy data into free_space and free_space_lvl, then initialize lnk */ 961 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 962 bj_ptr[i] = current_space->array; 963 bjlvl_ptr[i] = current_space_lvl->array; 964 965 /* make sure the active row i has diagonal entry */ 966 if (*(bj_ptr[i]+bdiag[i]) != i) { 967 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 968 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 969 } 970 971 current_space->array += nzi; 972 current_space->local_used += nzi; 973 current_space->local_remaining -= nzi; 974 current_space_lvl->array += nzi; 975 current_space_lvl->local_used += nzi; 976 current_space_lvl->local_remaining -= nzi; 977 } 978 979 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 980 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 981 982 /* destroy list of free space and other temporary arrays */ 983 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 984 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 985 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 986 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 987 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 988 989 #if defined(PETSC_USE_DEBUG) 990 { 991 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 992 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 993 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 994 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 995 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 996 if (diagonal_fill) { 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 998 } 999 } 1000 #endif 1001 1002 /* put together the new matrix */ 1003 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1004 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1005 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1006 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1007 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1008 b = (Mat_SeqAIJ*)(*fact)->data; 1009 b->freedata = PETSC_TRUE; 1010 b->singlemalloc = PETSC_FALSE; 1011 len = (bi[n] )*sizeof(PetscScalar); 1012 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1013 b->j = bj; 1014 b->i = bi; 1015 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1016 b->diag = bdiag; 1017 b->ilen = 0; 1018 b->imax = 0; 1019 b->row = isrow; 1020 b->col = iscol; 1021 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1022 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1023 b->icol = isicol; 1024 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1025 /* In b structure: Free imax, ilen, old a, old j. 1026 Allocate bdiag, solve_work, new a, new j */ 1027 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1028 b->maxnz = b->nz = bi[n] ; 1029 (*fact)->factor = FACTOR_LU; 1030 (*fact)->info.factor_mallocs = reallocs; 1031 (*fact)->info.fill_ratio_given = f; 1032 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1033 1034 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1035 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1036 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #include "src/mat/impls/sbaij/seq/sbaij.h" 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1043 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1044 { 1045 Mat C = *B; 1046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1047 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1048 IS ip=b->row; 1049 PetscErrorCode ierr; 1050 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1051 PetscInt *ai=a->i,*aj=a->j; 1052 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1053 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1054 PetscReal zeropivot,rs,shiftnz; 1055 PetscReal shiftpd; 1056 ChShift_Ctx sctx; 1057 PetscInt newshift; 1058 1059 PetscFunctionBegin; 1060 shiftnz = info->shiftnz; 1061 shiftpd = info->shiftpd; 1062 zeropivot = info->zeropivot; 1063 1064 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1065 1066 /* initialization */ 1067 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1068 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1069 jl = il + mbs; 1070 rtmp = (MatScalar*)(jl + mbs); 1071 1072 sctx.shift_amount = 0; 1073 sctx.nshift = 0; 1074 do { 1075 sctx.chshift = PETSC_FALSE; 1076 for (i=0; i<mbs; i++) { 1077 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1078 } 1079 1080 for (k = 0; k<mbs; k++){ 1081 bval = ba + bi[k]; 1082 /* initialize k-th row by the perm[k]-th row of A */ 1083 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1084 for (j = jmin; j < jmax; j++){ 1085 col = rip[aj[j]]; 1086 if (col >= k){ /* only take upper triangular entry */ 1087 rtmp[col] = aa[j]; 1088 *bval++ = 0.0; /* for in-place factorization */ 1089 } 1090 } 1091 /* shift the diagonal of the matrix */ 1092 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1093 1094 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1095 dk = rtmp[k]; 1096 i = jl[k]; /* first row to be added to k_th row */ 1097 1098 while (i < k){ 1099 nexti = jl[i]; /* next row to be added to k_th row */ 1100 1101 /* compute multiplier, update diag(k) and U(i,k) */ 1102 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1103 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1104 dk += uikdi*ba[ili]; 1105 ba[ili] = uikdi; /* -U(i,k) */ 1106 1107 /* add multiple of row i to k-th row */ 1108 jmin = ili + 1; jmax = bi[i+1]; 1109 if (jmin < jmax){ 1110 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1111 /* update il and jl for row i */ 1112 il[i] = jmin; 1113 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1114 } 1115 i = nexti; 1116 } 1117 1118 /* shift the diagonals when zero pivot is detected */ 1119 /* compute rs=sum of abs(off-diagonal) */ 1120 rs = 0.0; 1121 jmin = bi[k]+1; 1122 nz = bi[k+1] - jmin; 1123 if (nz){ 1124 bcol = bj + jmin; 1125 while (nz--){ 1126 rs += PetscAbsScalar(rtmp[*bcol]); 1127 bcol++; 1128 } 1129 } 1130 1131 sctx.rs = rs; 1132 sctx.pv = dk; 1133 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1134 if (newshift == 1){ 1135 break; /* sctx.shift_amount is updated */ 1136 } else if (newshift == -1){ 1137 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1138 } 1139 1140 /* copy data into U(k,:) */ 1141 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1142 jmin = bi[k]+1; jmax = bi[k+1]; 1143 if (jmin < jmax) { 1144 for (j=jmin; j<jmax; j++){ 1145 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1146 } 1147 /* add the k-th row into il and jl */ 1148 il[k] = jmin; 1149 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1150 } 1151 } 1152 } while (sctx.chshift); 1153 ierr = PetscFree(il);CHKERRQ(ierr); 1154 1155 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1156 C->factor = FACTOR_CHOLESKY; 1157 C->assembled = PETSC_TRUE; 1158 C->preallocated = PETSC_TRUE; 1159 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1160 if (sctx.nshift){ 1161 if (shiftnz) { 1162 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1163 } else if (shiftpd) { 1164 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1165 } 1166 } 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1172 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1173 { 1174 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1175 Mat_SeqSBAIJ *b; 1176 Mat B; 1177 PetscErrorCode ierr; 1178 PetscTruth perm_identity; 1179 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1180 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1181 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1182 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1183 PetscReal fill=info->fill,levels=info->levels; 1184 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1185 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1186 PetscBT lnkbt; 1187 1188 PetscFunctionBegin; 1189 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1190 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1191 1192 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1193 ui[0] = 0; 1194 1195 /* special case that simply copies fill pattern */ 1196 if (!levels && perm_identity) { 1197 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1198 for (i=0; i<am; i++) { 1199 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1200 } 1201 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1202 cols = uj; 1203 for (i=0; i<am; i++) { 1204 aj = a->j + a->diag[i]; 1205 ncols = ui[i+1] - ui[i]; 1206 for (j=0; j<ncols; j++) *cols++ = *aj++; 1207 } 1208 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1209 /* initialization */ 1210 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1211 1212 /* jl: linked list for storing indices of the pivot rows 1213 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1214 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1215 il = jl + am; 1216 uj_ptr = (PetscInt**)(il + am); 1217 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1218 for (i=0; i<am; i++){ 1219 jl[i] = am; il[i] = 0; 1220 } 1221 1222 /* create and initialize a linked list for storing column indices of the active row k */ 1223 nlnk = am + 1; 1224 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1225 1226 /* initial FreeSpace size is fill*(ai[am]+1) */ 1227 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1228 current_space = free_space; 1229 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1230 current_space_lvl = free_space_lvl; 1231 1232 for (k=0; k<am; k++){ /* for each active row k */ 1233 /* initialize lnk by the column indices of row rip[k] of A */ 1234 nzk = 0; 1235 ncols = ai[rip[k]+1] - ai[rip[k]]; 1236 ncols_upper = 0; 1237 for (j=0; j<ncols; j++){ 1238 i = *(aj + ai[rip[k]] + j); 1239 if (rip[i] >= k){ /* only take upper triangular entry */ 1240 ajtmp[ncols_upper] = i; 1241 ncols_upper++; 1242 } 1243 } 1244 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1245 nzk += nlnk; 1246 1247 /* update lnk by computing fill-in for each pivot row to be merged in */ 1248 prow = jl[k]; /* 1st pivot row */ 1249 1250 while (prow < k){ 1251 nextprow = jl[prow]; 1252 1253 /* merge prow into k-th row */ 1254 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1255 jmax = ui[prow+1]; 1256 ncols = jmax-jmin; 1257 i = jmin - ui[prow]; 1258 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1259 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1260 j = *(uj - 1); 1261 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1262 nzk += nlnk; 1263 1264 /* update il and jl for prow */ 1265 if (jmin < jmax){ 1266 il[prow] = jmin; 1267 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1268 } 1269 prow = nextprow; 1270 } 1271 1272 /* if free space is not available, make more free space */ 1273 if (current_space->local_remaining<nzk) { 1274 i = am - k + 1; /* num of unfactored rows */ 1275 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1276 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1277 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1278 reallocs++; 1279 } 1280 1281 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1282 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1283 1284 /* add the k-th row into il and jl */ 1285 if (nzk > 1){ 1286 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1287 jl[k] = jl[i]; jl[i] = k; 1288 il[k] = ui[k] + 1; 1289 } 1290 uj_ptr[k] = current_space->array; 1291 uj_lvl_ptr[k] = current_space_lvl->array; 1292 1293 current_space->array += nzk; 1294 current_space->local_used += nzk; 1295 current_space->local_remaining -= nzk; 1296 1297 current_space_lvl->array += nzk; 1298 current_space_lvl->local_used += nzk; 1299 current_space_lvl->local_remaining -= nzk; 1300 1301 ui[k+1] = ui[k] + nzk; 1302 } 1303 1304 #if defined(PETSC_USE_DEBUG) 1305 if (ai[am] != 0) { 1306 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1307 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1308 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1309 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1310 } else { 1311 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1312 } 1313 #endif 1314 1315 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1316 ierr = PetscFree(jl);CHKERRQ(ierr); 1317 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1318 1319 /* destroy list of free space and other temporary array(s) */ 1320 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1321 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1322 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1323 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1324 1325 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1326 1327 /* put together the new matrix in MATSEQSBAIJ format */ 1328 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1329 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1330 B = *fact; 1331 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1332 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1333 1334 b = (Mat_SeqSBAIJ*)B->data; 1335 b->singlemalloc = PETSC_FALSE; 1336 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1337 b->j = uj; 1338 b->i = ui; 1339 b->diag = 0; 1340 b->ilen = 0; 1341 b->imax = 0; 1342 b->row = perm; 1343 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1344 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1345 b->icol = perm; 1346 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1347 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1348 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1349 b->maxnz = b->nz = ui[am]; 1350 1351 B->factor = FACTOR_CHOLESKY; 1352 B->info.factor_mallocs = reallocs; 1353 B->info.fill_ratio_given = fill; 1354 if (ai[am] != 0) { 1355 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1356 } else { 1357 B->info.fill_ratio_needed = 0.0; 1358 } 1359 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1360 if (perm_identity){ 1361 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1362 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1369 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1370 { 1371 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1372 Mat_SeqSBAIJ *b; 1373 Mat B; 1374 PetscErrorCode ierr; 1375 PetscTruth perm_identity; 1376 PetscReal fill = info->fill; 1377 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1378 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1379 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1380 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1381 PetscBT lnkbt; 1382 IS iperm; 1383 1384 PetscFunctionBegin; 1385 /* check whether perm is the identity mapping */ 1386 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1387 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1388 1389 if (!perm_identity){ 1390 /* check if perm is symmetric! */ 1391 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1392 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1393 for (i=0; i<am; i++) { 1394 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1395 } 1396 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1397 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1398 } 1399 1400 /* initialization */ 1401 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1402 ui[0] = 0; 1403 1404 /* jl: linked list for storing indices of the pivot rows 1405 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1406 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1407 il = jl + am; 1408 cols = il + am; 1409 ui_ptr = (PetscInt**)(cols + am); 1410 for (i=0; i<am; i++){ 1411 jl[i] = am; il[i] = 0; 1412 } 1413 1414 /* create and initialize a linked list for storing column indices of the active row k */ 1415 nlnk = am + 1; 1416 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1417 1418 /* initial FreeSpace size is fill*(ai[am]+1) */ 1419 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1420 current_space = free_space; 1421 1422 for (k=0; k<am; k++){ /* for each active row k */ 1423 /* initialize lnk by the column indices of row rip[k] of A */ 1424 nzk = 0; 1425 ncols = ai[rip[k]+1] - ai[rip[k]]; 1426 ncols_upper = 0; 1427 for (j=0; j<ncols; j++){ 1428 i = rip[*(aj + ai[rip[k]] + j)]; 1429 if (i >= k){ /* only take upper triangular entry */ 1430 cols[ncols_upper] = i; 1431 ncols_upper++; 1432 } 1433 } 1434 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1435 nzk += nlnk; 1436 1437 /* update lnk by computing fill-in for each pivot row to be merged in */ 1438 prow = jl[k]; /* 1st pivot row */ 1439 1440 while (prow < k){ 1441 nextprow = jl[prow]; 1442 /* merge prow into k-th row */ 1443 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1444 jmax = ui[prow+1]; 1445 ncols = jmax-jmin; 1446 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1447 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1448 nzk += nlnk; 1449 1450 /* update il and jl for prow */ 1451 if (jmin < jmax){ 1452 il[prow] = jmin; 1453 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1454 } 1455 prow = nextprow; 1456 } 1457 1458 /* if free space is not available, make more free space */ 1459 if (current_space->local_remaining<nzk) { 1460 i = am - k + 1; /* num of unfactored rows */ 1461 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1462 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1463 reallocs++; 1464 } 1465 1466 /* copy data into free space, then initialize lnk */ 1467 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1468 1469 /* add the k-th row into il and jl */ 1470 if (nzk-1 > 0){ 1471 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1472 jl[k] = jl[i]; jl[i] = k; 1473 il[k] = ui[k] + 1; 1474 } 1475 ui_ptr[k] = current_space->array; 1476 current_space->array += nzk; 1477 current_space->local_used += nzk; 1478 current_space->local_remaining -= nzk; 1479 1480 ui[k+1] = ui[k] + nzk; 1481 } 1482 1483 #if defined(PETSC_USE_DEBUG) 1484 if (ai[am] != 0) { 1485 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1486 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1487 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1488 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1489 } else { 1490 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1491 } 1492 #endif 1493 1494 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1495 ierr = PetscFree(jl);CHKERRQ(ierr); 1496 1497 /* destroy list of free space and other temporary array(s) */ 1498 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1499 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1500 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1501 1502 /* put together the new matrix in MATSEQSBAIJ format */ 1503 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1504 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1505 B = *fact; 1506 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1507 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1508 1509 b = (Mat_SeqSBAIJ*)B->data; 1510 b->singlemalloc = PETSC_FALSE; 1511 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1512 b->j = uj; 1513 b->i = ui; 1514 b->diag = 0; 1515 b->ilen = 0; 1516 b->imax = 0; 1517 b->row = perm; 1518 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1519 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1520 b->icol = perm; 1521 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1522 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1523 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1524 b->maxnz = b->nz = ui[am]; 1525 1526 B->factor = FACTOR_CHOLESKY; 1527 B->info.factor_mallocs = reallocs; 1528 B->info.fill_ratio_given = fill; 1529 if (ai[am] != 0) { 1530 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1531 } else { 1532 B->info.fill_ratio_needed = 0.0; 1533 } 1534 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1535 if (perm_identity){ 1536 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1537 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541