1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->freedata = PETSC_TRUE; 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 #if defined(PETSC_USE_DEBUG) 355 if (ai[n] != 0) { 356 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 361 } else { 362 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 363 } 364 #endif 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 ierr = PetscFree(b->imax);CHKERRQ(ierr); 382 b->freedata = PETSC_TRUE; 383 b->singlemalloc = PETSC_FALSE; 384 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 385 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 386 b->j = bj; 387 b->i = bi; 388 b->diag = bdiag; 389 b->ilen = 0; 390 b->imax = 0; 391 b->row = isrow; 392 b->col = iscol; 393 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 394 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 395 b->icol = isicol; 396 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 397 398 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 399 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 400 b->maxnz = b->nz = bi[n] ; 401 402 (*B)->factor = FACTOR_LU; 403 (*B)->info.factor_mallocs = reallocs; 404 (*B)->info.fill_ratio_given = f; 405 406 if (ai[n] != 0) { 407 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 408 } else { 409 (*B)->info.fill_ratio_needed = 0.0; 410 } 411 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413 PetscFunctionReturn(0); 414 } 415 416 /* ----------------------------------------------------------- */ 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 419 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 420 { 421 Mat C=*B; 422 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 423 IS isrow = b->row,isicol = b->icol; 424 PetscErrorCode ierr; 425 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 426 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 427 PetscInt *diag_offset = b->diag,diag,*pj; 428 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 429 PetscScalar d; 430 PetscReal rs; 431 LUShift_Ctx sctx; 432 PetscInt newshift; 433 434 PetscFunctionBegin; 435 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 436 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 437 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 438 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 439 rtmps = rtmp; ics = ic; 440 441 if (!a->diag) { 442 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 443 } 444 /* if both shift schemes are chosen by user, only use info->shiftpd */ 445 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 446 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 447 PetscInt *aai = a->i,*ddiag = a->diag; 448 sctx.shift_top = 0; 449 for (i=0; i<n; i++) { 450 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 451 d = (a->a)[ddiag[i]]; 452 rs = -PetscAbsScalar(d) - PetscRealPart(d); 453 v = a->a+aai[i]; 454 nz = aai[i+1] - aai[i]; 455 for (j=0; j<nz; j++) 456 rs += PetscAbsScalar(v[j]); 457 if (rs>sctx.shift_top) sctx.shift_top = rs; 458 } 459 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 460 sctx.shift_top *= 1.1; 461 sctx.nshift_max = 5; 462 sctx.shift_lo = 0.; 463 sctx.shift_hi = 1.; 464 } 465 466 sctx.shift_amount = 0; 467 sctx.nshift = 0; 468 do { 469 sctx.lushift = PETSC_FALSE; 470 for (i=0; i<n; i++){ 471 nz = bi[i+1] - bi[i]; 472 bjtmp = bj + bi[i]; 473 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 474 475 /* load in initial (unfactored row) */ 476 nz = a->i[r[i]+1] - a->i[r[i]]; 477 ajtmp = a->j + a->i[r[i]]; 478 v = a->a + a->i[r[i]]; 479 for (j=0; j<nz; j++) { 480 rtmp[ics[ajtmp[j]]] = v[j]; 481 } 482 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 483 484 row = *bjtmp++; 485 while (row < i) { 486 pc = rtmp + row; 487 if (*pc != 0.0) { 488 pv = b->a + diag_offset[row]; 489 pj = b->j + diag_offset[row] + 1; 490 multiplier = *pc / *pv++; 491 *pc = multiplier; 492 nz = bi[row+1] - diag_offset[row] - 1; 493 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 494 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 495 } 496 row = *bjtmp++; 497 } 498 /* finished row so stick it into b->a */ 499 pv = b->a + bi[i] ; 500 pj = b->j + bi[i] ; 501 nz = bi[i+1] - bi[i]; 502 diag = diag_offset[i] - bi[i]; 503 rs = 0.0; 504 for (j=0; j<nz; j++) { 505 pv[j] = rtmps[pj[j]]; 506 if (j != diag) rs += PetscAbsScalar(pv[j]); 507 } 508 509 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 510 sctx.rs = rs; 511 sctx.pv = pv[diag]; 512 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 513 if (newshift == 1){ 514 break; /* sctx.shift_amount is updated */ 515 } else if (newshift == -1){ 516 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 517 } 518 } 519 520 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 521 /* 522 * if no shift in this attempt & shifting & started shifting & can refine, 523 * then try lower shift 524 */ 525 sctx.shift_hi = info->shift_fraction; 526 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 527 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 528 sctx.lushift = PETSC_TRUE; 529 sctx.nshift++; 530 } 531 } while (sctx.lushift); 532 533 /* invert diagonal entries for simplier triangular solves */ 534 for (i=0; i<n; i++) { 535 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 536 } 537 538 ierr = PetscFree(rtmp);CHKERRQ(ierr); 539 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 540 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 541 C->factor = FACTOR_LU; 542 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 543 C->assembled = PETSC_TRUE; 544 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 545 if (sctx.nshift){ 546 if (info->shiftnz) { 547 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 548 } else if (info->shiftpd) { 549 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 558 { 559 PetscFunctionBegin; 560 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 561 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 562 PetscFunctionReturn(0); 563 } 564 565 566 /* ----------------------------------------------------------- */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatLUFactor_SeqAIJ" 569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 570 { 571 PetscErrorCode ierr; 572 Mat C; 573 574 PetscFunctionBegin; 575 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 576 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 577 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 578 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatSolve_SeqAIJ" 584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 585 { 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 587 IS iscol = a->col,isrow = a->row; 588 PetscErrorCode ierr; 589 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 590 PetscInt nz,*rout,*cout; 591 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 592 593 PetscFunctionBegin; 594 if (!n) PetscFunctionReturn(0); 595 596 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 tmp = a->solve_work; 599 600 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 601 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 602 603 /* forward solve the lower triangular */ 604 tmp[0] = b[*r++]; 605 tmps = tmp; 606 for (i=1; i<n; i++) { 607 v = aa + ai[i] ; 608 vi = aj + ai[i] ; 609 nz = a->diag[i] - ai[i]; 610 sum = b[*r++]; 611 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 612 tmp[i] = sum; 613 } 614 615 /* backward solve the upper triangular */ 616 for (i=n-1; i>=0; i--){ 617 v = aa + a->diag[i] + 1; 618 vi = aj + a->diag[i] + 1; 619 nz = ai[i+1] - a->diag[i] - 1; 620 sum = tmp[i]; 621 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 622 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 623 } 624 625 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 626 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 627 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 628 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 629 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* ----------------------------------------------------------- */ 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 637 { 638 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 639 PetscErrorCode ierr; 640 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 641 PetscScalar *x,*b,*aa = a->a; 642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 643 PetscInt adiag_i,i,*vi,nz,ai_i; 644 PetscScalar *v,sum; 645 #endif 646 647 PetscFunctionBegin; 648 if (!n) PetscFunctionReturn(0); 649 650 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 651 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 652 653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 654 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 655 #else 656 /* forward solve the lower triangular */ 657 x[0] = b[0]; 658 for (i=1; i<n; i++) { 659 ai_i = ai[i]; 660 v = aa + ai_i; 661 vi = aj + ai_i; 662 nz = adiag[i] - ai_i; 663 sum = b[i]; 664 while (nz--) sum -= *v++ * x[*vi++]; 665 x[i] = sum; 666 } 667 668 /* backward solve the upper triangular */ 669 for (i=n-1; i>=0; i--){ 670 adiag_i = adiag[i]; 671 v = aa + adiag_i + 1; 672 vi = aj + adiag_i + 1; 673 nz = ai[i+1] - adiag_i - 1; 674 sum = x[i]; 675 while (nz--) sum -= *v++ * x[*vi++]; 676 x[i] = sum*aa[adiag_i]; 677 } 678 #endif 679 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 688 { 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 IS iscol = a->col,isrow = a->row; 691 PetscErrorCode ierr; 692 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 693 PetscInt nz,*rout,*cout; 694 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 695 696 PetscFunctionBegin; 697 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 698 699 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 701 tmp = a->solve_work; 702 703 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 704 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 705 706 /* forward solve the lower triangular */ 707 tmp[0] = b[*r++]; 708 for (i=1; i<n; i++) { 709 v = aa + ai[i] ; 710 vi = aj + ai[i] ; 711 nz = a->diag[i] - ai[i]; 712 sum = b[*r++]; 713 while (nz--) sum -= *v++ * tmp[*vi++ ]; 714 tmp[i] = sum; 715 } 716 717 /* backward solve the upper triangular */ 718 for (i=n-1; i>=0; i--){ 719 v = aa + a->diag[i] + 1; 720 vi = aj + a->diag[i] + 1; 721 nz = ai[i+1] - a->diag[i] - 1; 722 sum = tmp[i]; 723 while (nz--) sum -= *v++ * tmp[*vi++ ]; 724 tmp[i] = sum*aa[a->diag[i]]; 725 x[*c--] += tmp[i]; 726 } 727 728 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 730 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 732 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 733 734 PetscFunctionReturn(0); 735 } 736 /* -------------------------------------------------------------------*/ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 740 { 741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 742 IS iscol = a->col,isrow = a->row; 743 PetscErrorCode ierr; 744 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 745 PetscInt nz,*rout,*cout,*diag = a->diag; 746 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 747 748 PetscFunctionBegin; 749 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 750 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 751 tmp = a->solve_work; 752 753 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 754 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 755 756 /* copy the b into temp work space according to permutation */ 757 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 758 759 /* forward solve the U^T */ 760 for (i=0; i<n; i++) { 761 v = aa + diag[i] ; 762 vi = aj + diag[i] + 1; 763 nz = ai[i+1] - diag[i] - 1; 764 s1 = tmp[i]; 765 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 766 while (nz--) { 767 tmp[*vi++ ] -= (*v++)*s1; 768 } 769 tmp[i] = s1; 770 } 771 772 /* backward solve the L^T */ 773 for (i=n-1; i>=0; i--){ 774 v = aa + diag[i] - 1 ; 775 vi = aj + diag[i] - 1 ; 776 nz = diag[i] - ai[i]; 777 s1 = tmp[i]; 778 while (nz--) { 779 tmp[*vi-- ] -= (*v--)*s1; 780 } 781 } 782 783 /* copy tmp into x according to permutation */ 784 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 791 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 800 IS iscol = a->col,isrow = a->row; 801 PetscErrorCode ierr; 802 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 803 PetscInt nz,*rout,*cout,*diag = a->diag; 804 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 805 806 PetscFunctionBegin; 807 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 808 809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 tmp = a->solve_work; 812 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 /* copy the b into temp work space according to permutation */ 817 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 818 819 /* forward solve the U^T */ 820 for (i=0; i<n; i++) { 821 v = aa + diag[i] ; 822 vi = aj + diag[i] + 1; 823 nz = ai[i+1] - diag[i] - 1; 824 tmp[i] *= *v++; 825 while (nz--) { 826 tmp[*vi++ ] -= (*v++)*tmp[i]; 827 } 828 } 829 830 /* backward solve the L^T */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + diag[i] - 1 ; 833 vi = aj + diag[i] - 1 ; 834 nz = diag[i] - ai[i]; 835 while (nz--) { 836 tmp[*vi-- ] -= (*v--)*tmp[i]; 837 } 838 } 839 840 /* copy tmp into x according to permutation */ 841 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 848 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 /* ----------------------------------------------------------------*/ 852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 859 IS isicol; 860 PetscErrorCode ierr; 861 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 862 PetscInt *bi,*cols,nnz,*cols_lvl; 863 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 864 PetscInt i,levels,diagonal_fill; 865 PetscTruth col_identity,row_identity; 866 PetscReal f; 867 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 868 PetscBT lnkbt; 869 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 870 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 871 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 872 873 PetscFunctionBegin; 874 f = info->fill; 875 levels = (PetscInt)info->levels; 876 diagonal_fill = (PetscInt)info->diagonal_fill; 877 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 878 879 /* special case that simply copies fill pattern */ 880 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 881 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 882 if (!levels && row_identity && col_identity) { 883 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 884 (*fact)->factor = FACTOR_LU; 885 b = (Mat_SeqAIJ*)(*fact)->data; 886 if (!b->diag) { 887 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 } 889 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 890 b->row = isrow; 891 b->col = iscol; 892 b->icol = isicol; 893 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 894 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 895 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 901 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 902 903 /* get new row pointers */ 904 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 905 bi[0] = 0; 906 /* bdiag is location of diagonal in factor */ 907 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 908 bdiag[0] = 0; 909 910 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 911 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 912 913 /* create a linked list for storing column indices of the active row */ 914 nlnk = n + 1; 915 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 916 917 /* initial FreeSpace size is f*(ai[n]+1) */ 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 919 current_space = free_space; 920 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 921 current_space_lvl = free_space_lvl; 922 923 for (i=0; i<n; i++) { 924 nzi = 0; 925 /* copy current row into linked list */ 926 nnz = ai[r[i]+1] - ai[r[i]]; 927 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 928 cols = aj + ai[r[i]]; 929 lnk[i] = -1; /* marker to indicate if diagonal exists */ 930 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 931 nzi += nlnk; 932 933 /* make sure diagonal entry is included */ 934 if (diagonal_fill && lnk[i] == -1) { 935 fm = n; 936 while (lnk[fm] < i) fm = lnk[fm]; 937 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 938 lnk[fm] = i; 939 lnk_lvl[i] = 0; 940 nzi++; dcount++; 941 } 942 943 /* add pivot rows into the active row */ 944 nzbd = 0; 945 prow = lnk[n]; 946 while (prow < i) { 947 nnz = bdiag[prow]; 948 cols = bj_ptr[prow] + nnz + 1; 949 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 950 nnz = bi[prow+1] - bi[prow] - nnz - 1; 951 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 952 nzi += nlnk; 953 prow = lnk[prow]; 954 nzbd++; 955 } 956 bdiag[i] = nzbd; 957 bi[i+1] = bi[i] + nzi; 958 959 /* if free space is not available, make more free space */ 960 if (current_space->local_remaining<nzi) { 961 nnz = nzi*(n - i); /* estimated and max additional space needed */ 962 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 963 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 964 reallocs++; 965 } 966 967 /* copy data into free_space and free_space_lvl, then initialize lnk */ 968 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 969 bj_ptr[i] = current_space->array; 970 bjlvl_ptr[i] = current_space_lvl->array; 971 972 /* make sure the active row i has diagonal entry */ 973 if (*(bj_ptr[i]+bdiag[i]) != i) { 974 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 975 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 976 } 977 978 current_space->array += nzi; 979 current_space->local_used += nzi; 980 current_space->local_remaining -= nzi; 981 current_space_lvl->array += nzi; 982 current_space_lvl->local_used += nzi; 983 current_space_lvl->local_remaining -= nzi; 984 } 985 986 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 987 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 988 989 /* destroy list of free space and other temporary arrays */ 990 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 991 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 992 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 993 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 994 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 995 996 #if defined(PETSC_USE_DEBUG) 997 { 998 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1003 if (diagonal_fill) { 1004 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1005 } 1006 } 1007 #endif 1008 1009 /* put together the new matrix */ 1010 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1011 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1012 ierr = MatSeqAIJSetPreallocation(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1013 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1014 b = (Mat_SeqAIJ*)(*fact)->data; 1015 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1016 b->freedata = PETSC_TRUE; 1017 b->singlemalloc = PETSC_FALSE; 1018 len = (bi[n] )*sizeof(PetscScalar); 1019 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1020 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1021 b->j = bj; 1022 b->i = bi; 1023 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1024 b->diag = bdiag; 1025 b->ilen = 0; 1026 b->imax = 0; 1027 b->row = isrow; 1028 b->col = iscol; 1029 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1030 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1031 b->icol = isicol; 1032 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1033 /* In b structure: Free imax, ilen, old a, old j. 1034 Allocate bdiag, solve_work, new a, new j */ 1035 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1036 b->maxnz = b->nz = bi[n] ; 1037 (*fact)->factor = FACTOR_LU; 1038 (*fact)->info.factor_mallocs = reallocs; 1039 (*fact)->info.fill_ratio_given = f; 1040 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1041 1042 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1043 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1044 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #include "src/mat/impls/sbaij/seq/sbaij.h" 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1051 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1052 { 1053 Mat C = *B; 1054 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1055 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1056 IS ip=b->row; 1057 PetscErrorCode ierr; 1058 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1059 PetscInt *ai=a->i,*aj=a->j; 1060 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1061 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1062 PetscReal zeropivot,rs,shiftnz; 1063 PetscTruth shiftpd; 1064 ChShift_Ctx sctx; 1065 PetscInt newshift; 1066 1067 PetscFunctionBegin; 1068 shiftnz = info->shiftnz; 1069 shiftpd = info->shiftpd; 1070 zeropivot = info->zeropivot; 1071 1072 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1073 1074 /* initialization */ 1075 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1076 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1077 jl = il + mbs; 1078 rtmp = (MatScalar*)(jl + mbs); 1079 1080 sctx.shift_amount = 0; 1081 sctx.nshift = 0; 1082 do { 1083 sctx.chshift = PETSC_FALSE; 1084 for (i=0; i<mbs; i++) { 1085 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1086 } 1087 1088 for (k = 0; k<mbs; k++){ 1089 bval = ba + bi[k]; 1090 /* initialize k-th row by the perm[k]-th row of A */ 1091 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1092 for (j = jmin; j < jmax; j++){ 1093 col = rip[aj[j]]; 1094 if (col >= k){ /* only take upper triangular entry */ 1095 rtmp[col] = aa[j]; 1096 *bval++ = 0.0; /* for in-place factorization */ 1097 } 1098 } 1099 /* shift the diagonal of the matrix */ 1100 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1101 1102 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1103 dk = rtmp[k]; 1104 i = jl[k]; /* first row to be added to k_th row */ 1105 1106 while (i < k){ 1107 nexti = jl[i]; /* next row to be added to k_th row */ 1108 1109 /* compute multiplier, update diag(k) and U(i,k) */ 1110 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1111 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1112 dk += uikdi*ba[ili]; 1113 ba[ili] = uikdi; /* -U(i,k) */ 1114 1115 /* add multiple of row i to k-th row */ 1116 jmin = ili + 1; jmax = bi[i+1]; 1117 if (jmin < jmax){ 1118 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1119 /* update il and jl for row i */ 1120 il[i] = jmin; 1121 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1122 } 1123 i = nexti; 1124 } 1125 1126 /* shift the diagonals when zero pivot is detected */ 1127 /* compute rs=sum of abs(off-diagonal) */ 1128 rs = 0.0; 1129 jmin = bi[k]+1; 1130 nz = bi[k+1] - jmin; 1131 if (nz){ 1132 bcol = bj + jmin; 1133 while (nz--){ 1134 rs += PetscAbsScalar(rtmp[*bcol]); 1135 bcol++; 1136 } 1137 } 1138 1139 sctx.rs = rs; 1140 sctx.pv = dk; 1141 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1142 if (newshift == 1){ 1143 break; /* sctx.shift_amount is updated */ 1144 } else if (newshift == -1){ 1145 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1146 } 1147 1148 /* copy data into U(k,:) */ 1149 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1150 jmin = bi[k]+1; jmax = bi[k+1]; 1151 if (jmin < jmax) { 1152 for (j=jmin; j<jmax; j++){ 1153 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1154 } 1155 /* add the k-th row into il and jl */ 1156 il[k] = jmin; 1157 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1158 } 1159 } 1160 } while (sctx.chshift); 1161 ierr = PetscFree(il);CHKERRQ(ierr); 1162 1163 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1164 C->factor = FACTOR_CHOLESKY; 1165 C->assembled = PETSC_TRUE; 1166 C->preallocated = PETSC_TRUE; 1167 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1168 if (sctx.nshift){ 1169 if (shiftnz) { 1170 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1171 } else if (shiftpd) { 1172 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1173 } 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1180 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1181 { 1182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1183 Mat_SeqSBAIJ *b; 1184 Mat B; 1185 PetscErrorCode ierr; 1186 PetscTruth perm_identity; 1187 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1188 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1189 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1190 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1191 PetscReal fill=info->fill,levels=info->levels; 1192 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1193 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1194 PetscBT lnkbt; 1195 1196 PetscFunctionBegin; 1197 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1198 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1199 1200 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1201 ui[0] = 0; 1202 1203 /* special case that simply copies fill pattern */ 1204 if (!levels && perm_identity) { 1205 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1206 for (i=0; i<am; i++) { 1207 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1208 } 1209 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1210 cols = uj; 1211 for (i=0; i<am; i++) { 1212 aj = a->j + a->diag[i]; 1213 ncols = ui[i+1] - ui[i]; 1214 for (j=0; j<ncols; j++) *cols++ = *aj++; 1215 } 1216 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1217 /* initialization */ 1218 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1219 1220 /* jl: linked list for storing indices of the pivot rows 1221 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1222 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1223 il = jl + am; 1224 uj_ptr = (PetscInt**)(il + am); 1225 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1226 for (i=0; i<am; i++){ 1227 jl[i] = am; il[i] = 0; 1228 } 1229 1230 /* create and initialize a linked list for storing column indices of the active row k */ 1231 nlnk = am + 1; 1232 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1233 1234 /* initial FreeSpace size is fill*(ai[am]+1) */ 1235 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1236 current_space = free_space; 1237 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1238 current_space_lvl = free_space_lvl; 1239 1240 for (k=0; k<am; k++){ /* for each active row k */ 1241 /* initialize lnk by the column indices of row rip[k] of A */ 1242 nzk = 0; 1243 ncols = ai[rip[k]+1] - ai[rip[k]]; 1244 ncols_upper = 0; 1245 for (j=0; j<ncols; j++){ 1246 i = *(aj + ai[rip[k]] + j); 1247 if (rip[i] >= k){ /* only take upper triangular entry */ 1248 ajtmp[ncols_upper] = i; 1249 ncols_upper++; 1250 } 1251 } 1252 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1253 nzk += nlnk; 1254 1255 /* update lnk by computing fill-in for each pivot row to be merged in */ 1256 prow = jl[k]; /* 1st pivot row */ 1257 1258 while (prow < k){ 1259 nextprow = jl[prow]; 1260 1261 /* merge prow into k-th row */ 1262 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1263 jmax = ui[prow+1]; 1264 ncols = jmax-jmin; 1265 i = jmin - ui[prow]; 1266 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1267 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1268 j = *(uj - 1); 1269 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1270 nzk += nlnk; 1271 1272 /* update il and jl for prow */ 1273 if (jmin < jmax){ 1274 il[prow] = jmin; 1275 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1276 } 1277 prow = nextprow; 1278 } 1279 1280 /* if free space is not available, make more free space */ 1281 if (current_space->local_remaining<nzk) { 1282 i = am - k + 1; /* num of unfactored rows */ 1283 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1284 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1285 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1286 reallocs++; 1287 } 1288 1289 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1290 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1291 1292 /* add the k-th row into il and jl */ 1293 if (nzk > 1){ 1294 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1295 jl[k] = jl[i]; jl[i] = k; 1296 il[k] = ui[k] + 1; 1297 } 1298 uj_ptr[k] = current_space->array; 1299 uj_lvl_ptr[k] = current_space_lvl->array; 1300 1301 current_space->array += nzk; 1302 current_space->local_used += nzk; 1303 current_space->local_remaining -= nzk; 1304 1305 current_space_lvl->array += nzk; 1306 current_space_lvl->local_used += nzk; 1307 current_space_lvl->local_remaining -= nzk; 1308 1309 ui[k+1] = ui[k] + nzk; 1310 } 1311 1312 #if defined(PETSC_USE_DEBUG) 1313 if (ai[am] != 0) { 1314 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1315 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1316 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1317 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1320 } 1321 #endif 1322 1323 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1324 ierr = PetscFree(jl);CHKERRQ(ierr); 1325 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1326 1327 /* destroy list of free space and other temporary array(s) */ 1328 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1329 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1330 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1331 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1332 1333 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1334 1335 /* put together the new matrix in MATSEQSBAIJ format */ 1336 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1337 B = *fact; 1338 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1339 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1340 1341 b = (Mat_SeqSBAIJ*)B->data; 1342 /* the next line frees the default space generated by the Create() */ 1343 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1344 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1345 b->singlemalloc = PETSC_FALSE; 1346 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1347 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1348 b->j = uj; 1349 b->i = ui; 1350 b->diag = 0; 1351 b->ilen = 0; 1352 b->imax = 0; 1353 b->row = perm; 1354 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1355 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1356 b->icol = perm; 1357 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1358 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1359 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1360 b->maxnz = b->nz = ui[am]; 1361 1362 B->factor = FACTOR_CHOLESKY; 1363 B->info.factor_mallocs = reallocs; 1364 B->info.fill_ratio_given = fill; 1365 if (ai[am] != 0) { 1366 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1367 } else { 1368 B->info.fill_ratio_needed = 0.0; 1369 } 1370 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1371 if (perm_identity){ 1372 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1373 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1380 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1381 { 1382 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1383 Mat_SeqSBAIJ *b; 1384 Mat B; 1385 PetscErrorCode ierr; 1386 PetscTruth perm_identity; 1387 PetscReal fill = info->fill; 1388 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1389 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1390 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1391 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1392 PetscBT lnkbt; 1393 IS iperm; 1394 1395 PetscFunctionBegin; 1396 /* check whether perm is the identity mapping */ 1397 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1398 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1399 1400 if (!perm_identity){ 1401 /* check if perm is symmetric! */ 1402 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1403 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1404 for (i=0; i<am; i++) { 1405 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1406 } 1407 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1408 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1409 } 1410 1411 /* initialization */ 1412 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1413 ui[0] = 0; 1414 1415 /* jl: linked list for storing indices of the pivot rows 1416 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1417 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1418 il = jl + am; 1419 cols = il + am; 1420 ui_ptr = (PetscInt**)(cols + am); 1421 for (i=0; i<am; i++){ 1422 jl[i] = am; il[i] = 0; 1423 } 1424 1425 /* create and initialize a linked list for storing column indices of the active row k */ 1426 nlnk = am + 1; 1427 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1428 1429 /* initial FreeSpace size is fill*(ai[am]+1) */ 1430 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1431 current_space = free_space; 1432 1433 for (k=0; k<am; k++){ /* for each active row k */ 1434 /* initialize lnk by the column indices of row rip[k] of A */ 1435 nzk = 0; 1436 ncols = ai[rip[k]+1] - ai[rip[k]]; 1437 ncols_upper = 0; 1438 for (j=0; j<ncols; j++){ 1439 i = rip[*(aj + ai[rip[k]] + j)]; 1440 if (i >= k){ /* only take upper triangular entry */ 1441 cols[ncols_upper] = i; 1442 ncols_upper++; 1443 } 1444 } 1445 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1446 nzk += nlnk; 1447 1448 /* update lnk by computing fill-in for each pivot row to be merged in */ 1449 prow = jl[k]; /* 1st pivot row */ 1450 1451 while (prow < k){ 1452 nextprow = jl[prow]; 1453 /* merge prow into k-th row */ 1454 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1455 jmax = ui[prow+1]; 1456 ncols = jmax-jmin; 1457 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1458 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1459 nzk += nlnk; 1460 1461 /* update il and jl for prow */ 1462 if (jmin < jmax){ 1463 il[prow] = jmin; 1464 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1465 } 1466 prow = nextprow; 1467 } 1468 1469 /* if free space is not available, make more free space */ 1470 if (current_space->local_remaining<nzk) { 1471 i = am - k + 1; /* num of unfactored rows */ 1472 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1473 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1474 reallocs++; 1475 } 1476 1477 /* copy data into free space, then initialize lnk */ 1478 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1479 1480 /* add the k-th row into il and jl */ 1481 if (nzk-1 > 0){ 1482 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1483 jl[k] = jl[i]; jl[i] = k; 1484 il[k] = ui[k] + 1; 1485 } 1486 ui_ptr[k] = current_space->array; 1487 current_space->array += nzk; 1488 current_space->local_used += nzk; 1489 current_space->local_remaining -= nzk; 1490 1491 ui[k+1] = ui[k] + nzk; 1492 } 1493 1494 #if defined(PETSC_USE_DEBUG) 1495 if (ai[am] != 0) { 1496 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1497 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1498 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1499 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1500 } else { 1501 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1502 } 1503 #endif 1504 1505 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1506 ierr = PetscFree(jl);CHKERRQ(ierr); 1507 1508 /* destroy list of free space and other temporary array(s) */ 1509 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1510 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1511 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1512 1513 /* put together the new matrix in MATSEQSBAIJ format */ 1514 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1515 B = *fact; 1516 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1517 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1518 1519 b = (Mat_SeqSBAIJ*)B->data; 1520 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1521 b->singlemalloc = PETSC_FALSE; 1522 /* the next line frees the default space generated by the Create() */ 1523 ierr = PetscFree3(b->a,b->j,b->i);CHKERRQ(ierr); 1524 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1525 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1526 b->j = uj; 1527 b->i = ui; 1528 b->diag = 0; 1529 b->ilen = 0; 1530 b->imax = 0; 1531 b->row = perm; 1532 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1533 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1534 b->icol = perm; 1535 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1536 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1537 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1538 b->maxnz = b->nz = ui[am]; 1539 1540 B->factor = FACTOR_CHOLESKY; 1541 B->info.factor_mallocs = reallocs; 1542 B->info.fill_ratio_given = fill; 1543 if (ai[am] != 0) { 1544 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1545 } else { 1546 B->info.fill_ratio_needed = 0.0; 1547 } 1548 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1549 if (perm_identity){ 1550 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1551 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555