1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 225 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 226 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 227 (*fact)->factor = FACTOR_LU; 228 (*fact)->assembled = PETSC_TRUE; 229 230 b = (Mat_SeqAIJ*)(*fact)->data; 231 b->freedata = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 #if defined(PETSC_USE_DEBUG) 355 if (ai[n] != 0) { 356 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 361 } else { 362 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 363 } 364 #endif 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 b->freedata = PETSC_TRUE; 382 b->singlemalloc = PETSC_FALSE; 383 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 384 b->j = bj; 385 b->i = bi; 386 b->diag = bdiag; 387 b->ilen = 0; 388 b->imax = 0; 389 b->row = isrow; 390 b->col = iscol; 391 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 392 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 393 b->icol = isicol; 394 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 395 396 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 397 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 398 b->maxnz = b->nz = bi[n] ; 399 400 (*B)->factor = FACTOR_LU; 401 (*B)->info.factor_mallocs = reallocs; 402 (*B)->info.fill_ratio_given = f; 403 404 if (ai[n] != 0) { 405 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 406 } else { 407 (*B)->info.fill_ratio_needed = 0.0; 408 } 409 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 410 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 411 PetscFunctionReturn(0); 412 } 413 414 /* ----------------------------------------------------------- */ 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 417 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 418 { 419 Mat C=*B; 420 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 421 IS isrow = b->row,isicol = b->icol; 422 PetscErrorCode ierr; 423 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 424 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 425 PetscInt *diag_offset = b->diag,diag,*pj; 426 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 427 PetscScalar d; 428 PetscReal rs; 429 LUShift_Ctx sctx; 430 PetscInt newshift; 431 432 PetscFunctionBegin; 433 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 434 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 435 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 436 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 437 rtmps = rtmp; ics = ic; 438 439 if (!a->diag) { 440 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 441 } 442 /* if both shift schemes are chosen by user, only use info->shiftpd */ 443 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 444 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 445 PetscInt *aai = a->i,*ddiag = a->diag; 446 sctx.shift_top = 0; 447 for (i=0; i<n; i++) { 448 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 449 d = (a->a)[ddiag[i]]; 450 rs = -PetscAbsScalar(d) - PetscRealPart(d); 451 v = a->a+aai[i]; 452 nz = aai[i+1] - aai[i]; 453 for (j=0; j<nz; j++) 454 rs += PetscAbsScalar(v[j]); 455 if (rs>sctx.shift_top) sctx.shift_top = rs; 456 } 457 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 458 sctx.shift_top *= 1.1; 459 sctx.nshift_max = 5; 460 sctx.shift_lo = 0.; 461 sctx.shift_hi = 1.; 462 } 463 464 sctx.shift_amount = 0; 465 sctx.nshift = 0; 466 do { 467 sctx.lushift = PETSC_FALSE; 468 for (i=0; i<n; i++){ 469 nz = bi[i+1] - bi[i]; 470 bjtmp = bj + bi[i]; 471 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 472 473 /* load in initial (unfactored row) */ 474 nz = a->i[r[i]+1] - a->i[r[i]]; 475 ajtmp = a->j + a->i[r[i]]; 476 v = a->a + a->i[r[i]]; 477 for (j=0; j<nz; j++) { 478 rtmp[ics[ajtmp[j]]] = v[j]; 479 } 480 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 481 482 row = *bjtmp++; 483 while (row < i) { 484 pc = rtmp + row; 485 if (*pc != 0.0) { 486 pv = b->a + diag_offset[row]; 487 pj = b->j + diag_offset[row] + 1; 488 multiplier = *pc / *pv++; 489 *pc = multiplier; 490 nz = bi[row+1] - diag_offset[row] - 1; 491 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 492 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 493 } 494 row = *bjtmp++; 495 } 496 /* finished row so stick it into b->a */ 497 pv = b->a + bi[i] ; 498 pj = b->j + bi[i] ; 499 nz = bi[i+1] - bi[i]; 500 diag = diag_offset[i] - bi[i]; 501 rs = 0.0; 502 for (j=0; j<nz; j++) { 503 pv[j] = rtmps[pj[j]]; 504 if (j != diag) rs += PetscAbsScalar(pv[j]); 505 } 506 507 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 508 sctx.rs = rs; 509 sctx.pv = pv[diag]; 510 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 511 if (newshift == 1){ 512 break; /* sctx.shift_amount is updated */ 513 } else if (newshift == -1){ 514 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 515 } 516 } 517 518 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 519 /* 520 * if no shift in this attempt & shifting & started shifting & can refine, 521 * then try lower shift 522 */ 523 sctx.shift_hi = info->shift_fraction; 524 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 525 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 526 sctx.lushift = PETSC_TRUE; 527 sctx.nshift++; 528 } 529 } while (sctx.lushift); 530 531 /* invert diagonal entries for simplier triangular solves */ 532 for (i=0; i<n; i++) { 533 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 534 } 535 536 ierr = PetscFree(rtmp);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 539 C->factor = FACTOR_LU; 540 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 541 C->assembled = PETSC_TRUE; 542 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 543 if (sctx.nshift){ 544 if (info->shiftnz) { 545 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 546 } else if (info->shiftpd) { 547 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 548 } 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 556 { 557 PetscFunctionBegin; 558 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 559 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 560 PetscFunctionReturn(0); 561 } 562 563 564 /* ----------------------------------------------------------- */ 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatLUFactor_SeqAIJ" 567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 568 { 569 PetscErrorCode ierr; 570 Mat C; 571 572 PetscFunctionBegin; 573 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 574 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 575 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 576 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSolve_SeqAIJ" 582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 IS iscol = a->col,isrow = a->row; 586 PetscErrorCode ierr; 587 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 588 PetscInt nz,*rout,*cout; 589 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 590 591 PetscFunctionBegin; 592 if (!n) PetscFunctionReturn(0); 593 594 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 596 tmp = a->solve_work; 597 598 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 599 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 600 601 /* forward solve the lower triangular */ 602 tmp[0] = b[*r++]; 603 tmps = tmp; 604 for (i=1; i<n; i++) { 605 v = aa + ai[i] ; 606 vi = aj + ai[i] ; 607 nz = a->diag[i] - ai[i]; 608 sum = b[*r++]; 609 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 610 tmp[i] = sum; 611 } 612 613 /* backward solve the upper triangular */ 614 for (i=n-1; i>=0; i--){ 615 v = aa + a->diag[i] + 1; 616 vi = aj + a->diag[i] + 1; 617 nz = ai[i+1] - a->diag[i] - 1; 618 sum = tmp[i]; 619 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 620 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 621 } 622 623 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 624 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 625 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 /* ----------------------------------------------------------- */ 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 634 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 635 { 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 637 PetscErrorCode ierr; 638 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 639 PetscScalar *x,*b,*aa = a->a; 640 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 641 PetscInt adiag_i,i,*vi,nz,ai_i; 642 PetscScalar *v,sum; 643 #endif 644 645 PetscFunctionBegin; 646 if (!n) PetscFunctionReturn(0); 647 648 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 649 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 650 651 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 652 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 653 #else 654 /* forward solve the lower triangular */ 655 x[0] = b[0]; 656 for (i=1; i<n; i++) { 657 ai_i = ai[i]; 658 v = aa + ai_i; 659 vi = aj + ai_i; 660 nz = adiag[i] - ai_i; 661 sum = b[i]; 662 while (nz--) sum -= *v++ * x[*vi++]; 663 x[i] = sum; 664 } 665 666 /* backward solve the upper triangular */ 667 for (i=n-1; i>=0; i--){ 668 adiag_i = adiag[i]; 669 v = aa + adiag_i + 1; 670 vi = aj + adiag_i + 1; 671 nz = ai[i+1] - adiag_i - 1; 672 sum = x[i]; 673 while (nz--) sum -= *v++ * x[*vi++]; 674 x[i] = sum*aa[adiag_i]; 675 } 676 #endif 677 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 678 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 679 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 685 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 686 { 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 688 IS iscol = a->col,isrow = a->row; 689 PetscErrorCode ierr; 690 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 691 PetscInt nz,*rout,*cout; 692 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 693 694 PetscFunctionBegin; 695 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 696 697 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 699 tmp = a->solve_work; 700 701 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 702 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 703 704 /* forward solve the lower triangular */ 705 tmp[0] = b[*r++]; 706 for (i=1; i<n; i++) { 707 v = aa + ai[i] ; 708 vi = aj + ai[i] ; 709 nz = a->diag[i] - ai[i]; 710 sum = b[*r++]; 711 while (nz--) sum -= *v++ * tmp[*vi++ ]; 712 tmp[i] = sum; 713 } 714 715 /* backward solve the upper triangular */ 716 for (i=n-1; i>=0; i--){ 717 v = aa + a->diag[i] + 1; 718 vi = aj + a->diag[i] + 1; 719 nz = ai[i+1] - a->diag[i] - 1; 720 sum = tmp[i]; 721 while (nz--) sum -= *v++ * tmp[*vi++ ]; 722 tmp[i] = sum*aa[a->diag[i]]; 723 x[*c--] += tmp[i]; 724 } 725 726 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 728 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 729 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 730 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 731 732 PetscFunctionReturn(0); 733 } 734 /* -------------------------------------------------------------------*/ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 737 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 738 { 739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 740 IS iscol = a->col,isrow = a->row; 741 PetscErrorCode ierr; 742 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 743 PetscInt nz,*rout,*cout,*diag = a->diag; 744 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 745 746 PetscFunctionBegin; 747 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 749 tmp = a->solve_work; 750 751 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 752 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 753 754 /* copy the b into temp work space according to permutation */ 755 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 756 757 /* forward solve the U^T */ 758 for (i=0; i<n; i++) { 759 v = aa + diag[i] ; 760 vi = aj + diag[i] + 1; 761 nz = ai[i+1] - diag[i] - 1; 762 s1 = tmp[i]; 763 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 764 while (nz--) { 765 tmp[*vi++ ] -= (*v++)*s1; 766 } 767 tmp[i] = s1; 768 } 769 770 /* backward solve the L^T */ 771 for (i=n-1; i>=0; i--){ 772 v = aa + diag[i] - 1 ; 773 vi = aj + diag[i] - 1 ; 774 nz = diag[i] - ai[i]; 775 s1 = tmp[i]; 776 while (nz--) { 777 tmp[*vi-- ] -= (*v--)*s1; 778 } 779 } 780 781 /* copy tmp into x according to permutation */ 782 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 783 784 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 785 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 786 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 789 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 795 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,*diag = a->diag; 802 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 803 804 PetscFunctionBegin; 805 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 806 807 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 808 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 809 tmp = a->solve_work; 810 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 /* copy the b into temp work space according to permutation */ 815 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 816 817 /* forward solve the U^T */ 818 for (i=0; i<n; i++) { 819 v = aa + diag[i] ; 820 vi = aj + diag[i] + 1; 821 nz = ai[i+1] - diag[i] - 1; 822 tmp[i] *= *v++; 823 while (nz--) { 824 tmp[*vi++ ] -= (*v++)*tmp[i]; 825 } 826 } 827 828 /* backward solve the L^T */ 829 for (i=n-1; i>=0; i--){ 830 v = aa + diag[i] - 1 ; 831 vi = aj + diag[i] - 1 ; 832 nz = diag[i] - ai[i]; 833 while (nz--) { 834 tmp[*vi-- ] -= (*v--)*tmp[i]; 835 } 836 } 837 838 /* copy tmp into x according to permutation */ 839 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 840 841 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 842 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 843 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 844 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 845 846 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 /* ----------------------------------------------------------------*/ 850 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 854 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 855 { 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 857 IS isicol; 858 PetscErrorCode ierr; 859 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 860 PetscInt *bi,*cols,nnz,*cols_lvl; 861 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 862 PetscInt i,levels,diagonal_fill; 863 PetscTruth col_identity,row_identity; 864 PetscReal f; 865 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 866 PetscBT lnkbt; 867 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 868 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 869 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 870 871 PetscFunctionBegin; 872 f = info->fill; 873 levels = (PetscInt)info->levels; 874 diagonal_fill = (PetscInt)info->diagonal_fill; 875 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 876 877 /* special case that simply copies fill pattern */ 878 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 879 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 880 if (!levels && row_identity && col_identity) { 881 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 882 (*fact)->factor = FACTOR_LU; 883 b = (Mat_SeqAIJ*)(*fact)->data; 884 if (!b->diag) { 885 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 886 } 887 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 b->row = isrow; 889 b->col = iscol; 890 b->icol = isicol; 891 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 892 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 893 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 899 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 900 901 /* get new row pointers */ 902 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 903 bi[0] = 0; 904 /* bdiag is location of diagonal in factor */ 905 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 906 bdiag[0] = 0; 907 908 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 909 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 910 911 /* create a linked list for storing column indices of the active row */ 912 nlnk = n + 1; 913 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 914 915 /* initial FreeSpace size is f*(ai[n]+1) */ 916 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 917 current_space = free_space; 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 919 current_space_lvl = free_space_lvl; 920 921 for (i=0; i<n; i++) { 922 nzi = 0; 923 /* copy current row into linked list */ 924 nnz = ai[r[i]+1] - ai[r[i]]; 925 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 926 cols = aj + ai[r[i]]; 927 lnk[i] = -1; /* marker to indicate if diagonal exists */ 928 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 929 nzi += nlnk; 930 931 /* make sure diagonal entry is included */ 932 if (diagonal_fill && lnk[i] == -1) { 933 fm = n; 934 while (lnk[fm] < i) fm = lnk[fm]; 935 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 936 lnk[fm] = i; 937 lnk_lvl[i] = 0; 938 nzi++; dcount++; 939 } 940 941 /* add pivot rows into the active row */ 942 nzbd = 0; 943 prow = lnk[n]; 944 while (prow < i) { 945 nnz = bdiag[prow]; 946 cols = bj_ptr[prow] + nnz + 1; 947 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 948 nnz = bi[prow+1] - bi[prow] - nnz - 1; 949 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 950 nzi += nlnk; 951 prow = lnk[prow]; 952 nzbd++; 953 } 954 bdiag[i] = nzbd; 955 bi[i+1] = bi[i] + nzi; 956 957 /* if free space is not available, make more free space */ 958 if (current_space->local_remaining<nzi) { 959 nnz = nzi*(n - i); /* estimated and max additional space needed */ 960 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 961 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 962 reallocs++; 963 } 964 965 /* copy data into free_space and free_space_lvl, then initialize lnk */ 966 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 967 bj_ptr[i] = current_space->array; 968 bjlvl_ptr[i] = current_space_lvl->array; 969 970 /* make sure the active row i has diagonal entry */ 971 if (*(bj_ptr[i]+bdiag[i]) != i) { 972 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 973 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 974 } 975 976 current_space->array += nzi; 977 current_space->local_used += nzi; 978 current_space->local_remaining -= nzi; 979 current_space_lvl->array += nzi; 980 current_space_lvl->local_used += nzi; 981 current_space_lvl->local_remaining -= nzi; 982 } 983 984 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 985 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 986 987 /* destroy list of free space and other temporary arrays */ 988 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 989 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 990 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 991 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 992 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 993 994 #if defined(PETSC_USE_DEBUG) 995 { 996 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 998 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1001 if (diagonal_fill) { 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1003 } 1004 } 1005 #endif 1006 1007 /* put together the new matrix */ 1008 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1009 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1010 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1011 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1012 b = (Mat_SeqAIJ*)(*fact)->data; 1013 b->freedata = PETSC_TRUE; 1014 b->singlemalloc = PETSC_FALSE; 1015 len = (bi[n] )*sizeof(PetscScalar); 1016 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1017 b->j = bj; 1018 b->i = bi; 1019 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1020 b->diag = bdiag; 1021 b->ilen = 0; 1022 b->imax = 0; 1023 b->row = isrow; 1024 b->col = iscol; 1025 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1026 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1027 b->icol = isicol; 1028 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1029 /* In b structure: Free imax, ilen, old a, old j. 1030 Allocate bdiag, solve_work, new a, new j */ 1031 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1032 b->maxnz = b->nz = bi[n] ; 1033 (*fact)->factor = FACTOR_LU; 1034 (*fact)->info.factor_mallocs = reallocs; 1035 (*fact)->info.fill_ratio_given = f; 1036 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1037 1038 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1039 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1040 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #include "src/mat/impls/sbaij/seq/sbaij.h" 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1047 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1048 { 1049 Mat C = *B; 1050 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1051 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1052 IS ip=b->row; 1053 PetscErrorCode ierr; 1054 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1055 PetscInt *ai=a->i,*aj=a->j; 1056 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1057 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1058 PetscReal zeropivot,rs,shiftnz; 1059 PetscTruth shiftpd; 1060 ChShift_Ctx sctx; 1061 PetscInt newshift; 1062 1063 PetscFunctionBegin; 1064 shiftnz = info->shiftnz; 1065 shiftpd = info->shiftpd; 1066 zeropivot = info->zeropivot; 1067 1068 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1069 1070 /* initialization */ 1071 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1072 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1073 jl = il + mbs; 1074 rtmp = (MatScalar*)(jl + mbs); 1075 1076 sctx.shift_amount = 0; 1077 sctx.nshift = 0; 1078 do { 1079 sctx.chshift = PETSC_FALSE; 1080 for (i=0; i<mbs; i++) { 1081 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1082 } 1083 1084 for (k = 0; k<mbs; k++){ 1085 bval = ba + bi[k]; 1086 /* initialize k-th row by the perm[k]-th row of A */ 1087 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1088 for (j = jmin; j < jmax; j++){ 1089 col = rip[aj[j]]; 1090 if (col >= k){ /* only take upper triangular entry */ 1091 rtmp[col] = aa[j]; 1092 *bval++ = 0.0; /* for in-place factorization */ 1093 } 1094 } 1095 /* shift the diagonal of the matrix */ 1096 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1097 1098 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1099 dk = rtmp[k]; 1100 i = jl[k]; /* first row to be added to k_th row */ 1101 1102 while (i < k){ 1103 nexti = jl[i]; /* next row to be added to k_th row */ 1104 1105 /* compute multiplier, update diag(k) and U(i,k) */ 1106 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1107 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1108 dk += uikdi*ba[ili]; 1109 ba[ili] = uikdi; /* -U(i,k) */ 1110 1111 /* add multiple of row i to k-th row */ 1112 jmin = ili + 1; jmax = bi[i+1]; 1113 if (jmin < jmax){ 1114 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1115 /* update il and jl for row i */ 1116 il[i] = jmin; 1117 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1118 } 1119 i = nexti; 1120 } 1121 1122 /* shift the diagonals when zero pivot is detected */ 1123 /* compute rs=sum of abs(off-diagonal) */ 1124 rs = 0.0; 1125 jmin = bi[k]+1; 1126 nz = bi[k+1] - jmin; 1127 if (nz){ 1128 bcol = bj + jmin; 1129 while (nz--){ 1130 rs += PetscAbsScalar(rtmp[*bcol]); 1131 bcol++; 1132 } 1133 } 1134 1135 sctx.rs = rs; 1136 sctx.pv = dk; 1137 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1138 if (newshift == 1){ 1139 break; /* sctx.shift_amount is updated */ 1140 } else if (newshift == -1){ 1141 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1142 } 1143 1144 /* copy data into U(k,:) */ 1145 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1146 jmin = bi[k]+1; jmax = bi[k+1]; 1147 if (jmin < jmax) { 1148 for (j=jmin; j<jmax; j++){ 1149 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1150 } 1151 /* add the k-th row into il and jl */ 1152 il[k] = jmin; 1153 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1154 } 1155 } 1156 } while (sctx.chshift); 1157 ierr = PetscFree(il);CHKERRQ(ierr); 1158 1159 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1160 C->factor = FACTOR_CHOLESKY; 1161 C->assembled = PETSC_TRUE; 1162 C->preallocated = PETSC_TRUE; 1163 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1164 if (sctx.nshift){ 1165 if (shiftnz) { 1166 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1167 } else if (shiftpd) { 1168 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1169 } 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1176 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1177 { 1178 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1179 Mat_SeqSBAIJ *b; 1180 Mat B; 1181 PetscErrorCode ierr; 1182 PetscTruth perm_identity; 1183 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1184 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1185 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1186 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1187 PetscReal fill=info->fill,levels=info->levels; 1188 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1189 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1190 PetscBT lnkbt; 1191 1192 PetscFunctionBegin; 1193 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1194 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1195 1196 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1197 ui[0] = 0; 1198 1199 /* special case that simply copies fill pattern */ 1200 if (!levels && perm_identity) { 1201 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1202 for (i=0; i<am; i++) { 1203 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1204 } 1205 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1206 cols = uj; 1207 for (i=0; i<am; i++) { 1208 aj = a->j + a->diag[i]; 1209 ncols = ui[i+1] - ui[i]; 1210 for (j=0; j<ncols; j++) *cols++ = *aj++; 1211 } 1212 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1213 /* initialization */ 1214 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1215 1216 /* jl: linked list for storing indices of the pivot rows 1217 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1218 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1219 il = jl + am; 1220 uj_ptr = (PetscInt**)(il + am); 1221 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1222 for (i=0; i<am; i++){ 1223 jl[i] = am; il[i] = 0; 1224 } 1225 1226 /* create and initialize a linked list for storing column indices of the active row k */ 1227 nlnk = am + 1; 1228 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1229 1230 /* initial FreeSpace size is fill*(ai[am]+1) */ 1231 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1232 current_space = free_space; 1233 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1234 current_space_lvl = free_space_lvl; 1235 1236 for (k=0; k<am; k++){ /* for each active row k */ 1237 /* initialize lnk by the column indices of row rip[k] of A */ 1238 nzk = 0; 1239 ncols = ai[rip[k]+1] - ai[rip[k]]; 1240 ncols_upper = 0; 1241 for (j=0; j<ncols; j++){ 1242 i = *(aj + ai[rip[k]] + j); 1243 if (rip[i] >= k){ /* only take upper triangular entry */ 1244 ajtmp[ncols_upper] = i; 1245 ncols_upper++; 1246 } 1247 } 1248 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1249 nzk += nlnk; 1250 1251 /* update lnk by computing fill-in for each pivot row to be merged in */ 1252 prow = jl[k]; /* 1st pivot row */ 1253 1254 while (prow < k){ 1255 nextprow = jl[prow]; 1256 1257 /* merge prow into k-th row */ 1258 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1259 jmax = ui[prow+1]; 1260 ncols = jmax-jmin; 1261 i = jmin - ui[prow]; 1262 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1263 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1264 j = *(uj - 1); 1265 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1266 nzk += nlnk; 1267 1268 /* update il and jl for prow */ 1269 if (jmin < jmax){ 1270 il[prow] = jmin; 1271 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1272 } 1273 prow = nextprow; 1274 } 1275 1276 /* if free space is not available, make more free space */ 1277 if (current_space->local_remaining<nzk) { 1278 i = am - k + 1; /* num of unfactored rows */ 1279 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1280 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1281 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1282 reallocs++; 1283 } 1284 1285 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1286 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1287 1288 /* add the k-th row into il and jl */ 1289 if (nzk > 1){ 1290 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1291 jl[k] = jl[i]; jl[i] = k; 1292 il[k] = ui[k] + 1; 1293 } 1294 uj_ptr[k] = current_space->array; 1295 uj_lvl_ptr[k] = current_space_lvl->array; 1296 1297 current_space->array += nzk; 1298 current_space->local_used += nzk; 1299 current_space->local_remaining -= nzk; 1300 1301 current_space_lvl->array += nzk; 1302 current_space_lvl->local_used += nzk; 1303 current_space_lvl->local_remaining -= nzk; 1304 1305 ui[k+1] = ui[k] + nzk; 1306 } 1307 1308 #if defined(PETSC_USE_DEBUG) 1309 if (ai[am] != 0) { 1310 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1311 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1312 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1313 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1314 } else { 1315 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1316 } 1317 #endif 1318 1319 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1320 ierr = PetscFree(jl);CHKERRQ(ierr); 1321 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1322 1323 /* destroy list of free space and other temporary array(s) */ 1324 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1325 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1326 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1327 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1328 1329 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1330 1331 /* put together the new matrix in MATSEQSBAIJ format */ 1332 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1333 B = *fact; 1334 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1335 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1336 1337 b = (Mat_SeqSBAIJ*)B->data; 1338 b->singlemalloc = PETSC_FALSE; 1339 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1340 b->j = uj; 1341 b->i = ui; 1342 b->diag = 0; 1343 b->ilen = 0; 1344 b->imax = 0; 1345 b->row = perm; 1346 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1347 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1348 b->icol = perm; 1349 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1350 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1351 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1352 b->maxnz = b->nz = ui[am]; 1353 1354 B->factor = FACTOR_CHOLESKY; 1355 B->info.factor_mallocs = reallocs; 1356 B->info.fill_ratio_given = fill; 1357 if (ai[am] != 0) { 1358 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1359 } else { 1360 B->info.fill_ratio_needed = 0.0; 1361 } 1362 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1363 if (perm_identity){ 1364 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1365 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1372 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1373 { 1374 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1375 Mat_SeqSBAIJ *b; 1376 Mat B; 1377 PetscErrorCode ierr; 1378 PetscTruth perm_identity; 1379 PetscReal fill = info->fill; 1380 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1381 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1382 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1383 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1384 PetscBT lnkbt; 1385 IS iperm; 1386 1387 PetscFunctionBegin; 1388 /* check whether perm is the identity mapping */ 1389 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1390 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1391 1392 if (!perm_identity){ 1393 /* check if perm is symmetric! */ 1394 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1395 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1396 for (i=0; i<am; i++) { 1397 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1398 } 1399 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1400 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1401 } 1402 1403 /* initialization */ 1404 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1405 ui[0] = 0; 1406 1407 /* jl: linked list for storing indices of the pivot rows 1408 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1409 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1410 il = jl + am; 1411 cols = il + am; 1412 ui_ptr = (PetscInt**)(cols + am); 1413 for (i=0; i<am; i++){ 1414 jl[i] = am; il[i] = 0; 1415 } 1416 1417 /* create and initialize a linked list for storing column indices of the active row k */ 1418 nlnk = am + 1; 1419 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1420 1421 /* initial FreeSpace size is fill*(ai[am]+1) */ 1422 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1423 current_space = free_space; 1424 1425 for (k=0; k<am; k++){ /* for each active row k */ 1426 /* initialize lnk by the column indices of row rip[k] of A */ 1427 nzk = 0; 1428 ncols = ai[rip[k]+1] - ai[rip[k]]; 1429 ncols_upper = 0; 1430 for (j=0; j<ncols; j++){ 1431 i = rip[*(aj + ai[rip[k]] + j)]; 1432 if (i >= k){ /* only take upper triangular entry */ 1433 cols[ncols_upper] = i; 1434 ncols_upper++; 1435 } 1436 } 1437 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1438 nzk += nlnk; 1439 1440 /* update lnk by computing fill-in for each pivot row to be merged in */ 1441 prow = jl[k]; /* 1st pivot row */ 1442 1443 while (prow < k){ 1444 nextprow = jl[prow]; 1445 /* merge prow into k-th row */ 1446 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1447 jmax = ui[prow+1]; 1448 ncols = jmax-jmin; 1449 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1450 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1451 nzk += nlnk; 1452 1453 /* update il and jl for prow */ 1454 if (jmin < jmax){ 1455 il[prow] = jmin; 1456 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1457 } 1458 prow = nextprow; 1459 } 1460 1461 /* if free space is not available, make more free space */ 1462 if (current_space->local_remaining<nzk) { 1463 i = am - k + 1; /* num of unfactored rows */ 1464 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1465 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1466 reallocs++; 1467 } 1468 1469 /* copy data into free space, then initialize lnk */ 1470 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1471 1472 /* add the k-th row into il and jl */ 1473 if (nzk-1 > 0){ 1474 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1475 jl[k] = jl[i]; jl[i] = k; 1476 il[k] = ui[k] + 1; 1477 } 1478 ui_ptr[k] = current_space->array; 1479 current_space->array += nzk; 1480 current_space->local_used += nzk; 1481 current_space->local_remaining -= nzk; 1482 1483 ui[k+1] = ui[k] + nzk; 1484 } 1485 1486 #if defined(PETSC_USE_DEBUG) 1487 if (ai[am] != 0) { 1488 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1489 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1490 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1491 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1492 } else { 1493 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1494 } 1495 #endif 1496 1497 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1498 ierr = PetscFree(jl);CHKERRQ(ierr); 1499 1500 /* destroy list of free space and other temporary array(s) */ 1501 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1502 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1503 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1504 1505 /* put together the new matrix in MATSEQSBAIJ format */ 1506 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1507 B = *fact; 1508 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1510 1511 b = (Mat_SeqSBAIJ*)B->data; 1512 b->singlemalloc = PETSC_FALSE; 1513 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1514 b->j = uj; 1515 b->i = ui; 1516 b->diag = 0; 1517 b->ilen = 0; 1518 b->imax = 0; 1519 b->row = perm; 1520 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1521 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1522 b->icol = perm; 1523 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1524 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1525 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1526 b->maxnz = b->nz = ui[am]; 1527 1528 B->factor = FACTOR_CHOLESKY; 1529 B->info.factor_mallocs = reallocs; 1530 B->info.fill_ratio_given = fill; 1531 if (ai[am] != 0) { 1532 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1533 } else { 1534 B->info.fill_ratio_needed = 0.0; 1535 } 1536 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1537 if (perm_identity){ 1538 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1539 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543