1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 if (ai[n] != 0) { 355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 356 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 357 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 360 } else { 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 362 } 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 375 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 376 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(*B)->data; 379 ierr = PetscFree(b->imax);CHKERRQ(ierr); 380 b->singlemalloc = PETSC_FALSE; 381 /* the next line frees the default space generated by the Create() */ 382 ierr = PetscFree(b->a);CHKERRQ(ierr); 383 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 384 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 385 b->j = bj; 386 b->i = bi; 387 b->diag = bdiag; 388 b->ilen = 0; 389 b->imax = 0; 390 b->row = isrow; 391 b->col = iscol; 392 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 393 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 394 b->icol = isicol; 395 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 396 397 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 398 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 399 b->maxnz = b->nz = bi[n] ; 400 401 (*B)->factor = FACTOR_LU; 402 (*B)->info.factor_mallocs = reallocs; 403 (*B)->info.fill_ratio_given = f; 404 405 if (ai[n] != 0) { 406 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 407 } else { 408 (*B)->info.fill_ratio_needed = 0.0; 409 } 410 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 PetscFunctionReturn(0); 413 } 414 415 /* ----------------------------------------------------------- */ 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 419 { 420 Mat C=*B; 421 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 422 IS isrow = b->row,isicol = b->icol; 423 PetscErrorCode ierr; 424 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 425 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 426 PetscInt *diag_offset = b->diag,diag,*pj; 427 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 428 PetscReal rs,d; 429 LUShift_Ctx sctx; 430 PetscInt newshift; 431 432 PetscFunctionBegin; 433 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 434 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 435 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 436 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 437 rtmps = rtmp; ics = ic; 438 439 if (!a->diag) { 440 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 441 } 442 /* if both shift schemes are chosen by user, only use info->shiftpd */ 443 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 444 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 445 PetscInt *aai = a->i,*ddiag = a->diag; 446 sctx.shift_top = 0; 447 for (i=0; i<n; i++) { 448 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 449 d = (a->a)[ddiag[i]]; 450 rs = -PetscAbsScalar(d) - PetscRealPart(d); 451 v = a->a+aai[i]; 452 nz = aai[i+1] - aai[i]; 453 for (j=0; j<nz; j++) 454 rs += PetscAbsScalar(v[j]); 455 if (rs>sctx.shift_top) sctx.shift_top = rs; 456 } 457 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 458 sctx.shift_top *= 1.1; 459 sctx.nshift_max = 5; 460 sctx.shift_lo = 0.; 461 sctx.shift_hi = 1.; 462 } 463 464 sctx.shift_amount = 0; 465 sctx.nshift = 0; 466 do { 467 sctx.lushift = PETSC_FALSE; 468 for (i=0; i<n; i++){ 469 nz = bi[i+1] - bi[i]; 470 bjtmp = bj + bi[i]; 471 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 472 473 /* load in initial (unfactored row) */ 474 nz = a->i[r[i]+1] - a->i[r[i]]; 475 ajtmp = a->j + a->i[r[i]]; 476 v = a->a + a->i[r[i]]; 477 for (j=0; j<nz; j++) { 478 rtmp[ics[ajtmp[j]]] = v[j]; 479 } 480 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 481 482 row = *bjtmp++; 483 while (row < i) { 484 pc = rtmp + row; 485 if (*pc != 0.0) { 486 pv = b->a + diag_offset[row]; 487 pj = b->j + diag_offset[row] + 1; 488 multiplier = *pc / *pv++; 489 *pc = multiplier; 490 nz = bi[row+1] - diag_offset[row] - 1; 491 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 492 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 493 } 494 row = *bjtmp++; 495 } 496 /* finished row so stick it into b->a */ 497 pv = b->a + bi[i] ; 498 pj = b->j + bi[i] ; 499 nz = bi[i+1] - bi[i]; 500 diag = diag_offset[i] - bi[i]; 501 rs = 0.0; 502 for (j=0; j<nz; j++) { 503 pv[j] = rtmps[pj[j]]; 504 if (j != diag) rs += PetscAbsScalar(pv[j]); 505 } 506 507 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 508 sctx.rs = rs; 509 sctx.pv = pv[diag]; 510 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 511 if (newshift == 1){ 512 break; /* sctx.shift_amount is updated */ 513 } else if (newshift == -1){ 514 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 515 } 516 } 517 518 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 519 /* 520 * if no shift in this attempt & shifting & started shifting & can refine, 521 * then try lower shift 522 */ 523 sctx.shift_hi = info->shift_fraction; 524 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 525 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 526 sctx.lushift = PETSC_TRUE; 527 sctx.nshift++; 528 } 529 } while (sctx.lushift); 530 531 /* invert diagonal entries for simplier triangular solves */ 532 for (i=0; i<n; i++) { 533 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 534 } 535 536 ierr = PetscFree(rtmp);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 539 C->factor = FACTOR_LU; 540 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 541 C->assembled = PETSC_TRUE; 542 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 543 if (sctx.nshift){ 544 if (info->shiftnz) { 545 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 546 } else if (info->shiftpd) { 547 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top); 548 } 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 556 { 557 PetscFunctionBegin; 558 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 559 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 560 PetscFunctionReturn(0); 561 } 562 563 564 /* ----------------------------------------------------------- */ 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatLUFactor_SeqAIJ" 567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 568 { 569 PetscErrorCode ierr; 570 Mat C; 571 572 PetscFunctionBegin; 573 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 574 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 575 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 576 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSolve_SeqAIJ" 582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 IS iscol = a->col,isrow = a->row; 586 PetscErrorCode ierr; 587 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 588 PetscInt nz,*rout,*cout; 589 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 590 591 PetscFunctionBegin; 592 if (!n) PetscFunctionReturn(0); 593 594 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 596 tmp = a->solve_work; 597 598 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 599 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 600 601 /* forward solve the lower triangular */ 602 tmp[0] = b[*r++]; 603 tmps = tmp; 604 for (i=1; i<n; i++) { 605 v = aa + ai[i] ; 606 vi = aj + ai[i] ; 607 nz = a->diag[i] - ai[i]; 608 sum = b[*r++]; 609 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 610 tmp[i] = sum; 611 } 612 613 /* backward solve the upper triangular */ 614 for (i=n-1; i>=0; i--){ 615 v = aa + a->diag[i] + 1; 616 vi = aj + a->diag[i] + 1; 617 nz = ai[i+1] - a->diag[i] - 1; 618 sum = tmp[i]; 619 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 620 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 621 } 622 623 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 624 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 625 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 /* ----------------------------------------------------------- */ 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 634 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 635 { 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 637 PetscErrorCode ierr; 638 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 639 PetscScalar *x,*b,*aa = a->a; 640 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 641 PetscInt adiag_i,i,*vi,nz,ai_i; 642 PetscScalar *v,sum; 643 #endif 644 645 PetscFunctionBegin; 646 if (!n) PetscFunctionReturn(0); 647 648 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 649 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 650 651 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 652 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 653 #else 654 /* forward solve the lower triangular */ 655 x[0] = b[0]; 656 for (i=1; i<n; i++) { 657 ai_i = ai[i]; 658 v = aa + ai_i; 659 vi = aj + ai_i; 660 nz = adiag[i] - ai_i; 661 sum = b[i]; 662 while (nz--) sum -= *v++ * x[*vi++]; 663 x[i] = sum; 664 } 665 666 /* backward solve the upper triangular */ 667 for (i=n-1; i>=0; i--){ 668 adiag_i = adiag[i]; 669 v = aa + adiag_i + 1; 670 vi = aj + adiag_i + 1; 671 nz = ai[i+1] - adiag_i - 1; 672 sum = x[i]; 673 while (nz--) sum -= *v++ * x[*vi++]; 674 x[i] = sum*aa[adiag_i]; 675 } 676 #endif 677 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 678 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 679 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 685 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 686 { 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 688 IS iscol = a->col,isrow = a->row; 689 PetscErrorCode ierr; 690 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 691 PetscInt nz,*rout,*cout; 692 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 693 694 PetscFunctionBegin; 695 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 696 697 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 699 tmp = a->solve_work; 700 701 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 702 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 703 704 /* forward solve the lower triangular */ 705 tmp[0] = b[*r++]; 706 for (i=1; i<n; i++) { 707 v = aa + ai[i] ; 708 vi = aj + ai[i] ; 709 nz = a->diag[i] - ai[i]; 710 sum = b[*r++]; 711 while (nz--) sum -= *v++ * tmp[*vi++ ]; 712 tmp[i] = sum; 713 } 714 715 /* backward solve the upper triangular */ 716 for (i=n-1; i>=0; i--){ 717 v = aa + a->diag[i] + 1; 718 vi = aj + a->diag[i] + 1; 719 nz = ai[i+1] - a->diag[i] - 1; 720 sum = tmp[i]; 721 while (nz--) sum -= *v++ * tmp[*vi++ ]; 722 tmp[i] = sum*aa[a->diag[i]]; 723 x[*c--] += tmp[i]; 724 } 725 726 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 728 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 729 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 730 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 731 732 PetscFunctionReturn(0); 733 } 734 /* -------------------------------------------------------------------*/ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 737 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 738 { 739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 740 IS iscol = a->col,isrow = a->row; 741 PetscErrorCode ierr; 742 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 743 PetscInt nz,*rout,*cout,*diag = a->diag; 744 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 745 746 PetscFunctionBegin; 747 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 749 tmp = a->solve_work; 750 751 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 752 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 753 754 /* copy the b into temp work space according to permutation */ 755 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 756 757 /* forward solve the U^T */ 758 for (i=0; i<n; i++) { 759 v = aa + diag[i] ; 760 vi = aj + diag[i] + 1; 761 nz = ai[i+1] - diag[i] - 1; 762 s1 = tmp[i]; 763 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 764 while (nz--) { 765 tmp[*vi++ ] -= (*v++)*s1; 766 } 767 tmp[i] = s1; 768 } 769 770 /* backward solve the L^T */ 771 for (i=n-1; i>=0; i--){ 772 v = aa + diag[i] - 1 ; 773 vi = aj + diag[i] - 1 ; 774 nz = diag[i] - ai[i]; 775 s1 = tmp[i]; 776 while (nz--) { 777 tmp[*vi-- ] -= (*v--)*s1; 778 } 779 } 780 781 /* copy tmp into x according to permutation */ 782 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 783 784 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 785 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 786 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 789 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 795 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,*diag = a->diag; 802 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 803 804 PetscFunctionBegin; 805 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 806 807 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 808 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 809 tmp = a->solve_work; 810 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 /* copy the b into temp work space according to permutation */ 815 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 816 817 /* forward solve the U^T */ 818 for (i=0; i<n; i++) { 819 v = aa + diag[i] ; 820 vi = aj + diag[i] + 1; 821 nz = ai[i+1] - diag[i] - 1; 822 tmp[i] *= *v++; 823 while (nz--) { 824 tmp[*vi++ ] -= (*v++)*tmp[i]; 825 } 826 } 827 828 /* backward solve the L^T */ 829 for (i=n-1; i>=0; i--){ 830 v = aa + diag[i] - 1 ; 831 vi = aj + diag[i] - 1 ; 832 nz = diag[i] - ai[i]; 833 while (nz--) { 834 tmp[*vi-- ] -= (*v--)*tmp[i]; 835 } 836 } 837 838 /* copy tmp into x according to permutation */ 839 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 840 841 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 842 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 843 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 844 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 845 846 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 /* ----------------------------------------------------------------*/ 850 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 854 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 855 { 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 857 IS isicol; 858 PetscErrorCode ierr; 859 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 860 PetscInt *bi,*cols,nnz,*cols_lvl; 861 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 862 PetscInt i,levels,diagonal_fill; 863 PetscTruth col_identity,row_identity; 864 PetscReal f; 865 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 866 PetscBT lnkbt; 867 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 868 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 869 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 870 871 PetscFunctionBegin; 872 f = info->fill; 873 levels = (PetscInt)info->levels; 874 diagonal_fill = (PetscInt)info->diagonal_fill; 875 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 876 877 /* special case that simply copies fill pattern */ 878 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 879 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 880 if (!levels && row_identity && col_identity) { 881 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 882 (*fact)->factor = FACTOR_LU; 883 b = (Mat_SeqAIJ*)(*fact)->data; 884 if (!b->diag) { 885 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 886 } 887 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 b->row = isrow; 889 b->col = iscol; 890 b->icol = isicol; 891 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 892 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 893 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 899 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 900 901 /* get new row pointers */ 902 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 903 bi[0] = 0; 904 /* bdiag is location of diagonal in factor */ 905 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 906 bdiag[0] = 0; 907 908 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 909 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 910 911 /* create a linked list for storing column indices of the active row */ 912 nlnk = n + 1; 913 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 914 915 /* initial FreeSpace size is f*(ai[n]+1) */ 916 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 917 current_space = free_space; 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 919 current_space_lvl = free_space_lvl; 920 921 for (i=0; i<n; i++) { 922 nzi = 0; 923 /* copy current row into linked list */ 924 nnz = ai[r[i]+1] - ai[r[i]]; 925 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 926 cols = aj + ai[r[i]]; 927 lnk[i] = -1; /* marker to indicate if diagonal exists */ 928 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 929 nzi += nlnk; 930 931 /* make sure diagonal entry is included */ 932 if (diagonal_fill && lnk[i] == -1) { 933 fm = n; 934 while (lnk[fm] < i) fm = lnk[fm]; 935 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 936 lnk[fm] = i; 937 lnk_lvl[i] = 0; 938 nzi++; dcount++; 939 } 940 941 /* add pivot rows into the active row */ 942 nzbd = 0; 943 prow = lnk[n]; 944 while (prow < i) { 945 nnz = bdiag[prow]; 946 cols = bj_ptr[prow] + nnz + 1; 947 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 948 nnz = bi[prow+1] - bi[prow] - nnz - 1; 949 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 950 nzi += nlnk; 951 prow = lnk[prow]; 952 nzbd++; 953 } 954 bdiag[i] = nzbd; 955 bi[i+1] = bi[i] + nzi; 956 957 /* if free space is not available, make more free space */ 958 if (current_space->local_remaining<nzi) { 959 nnz = nzi*(n - i); /* estimated and max additional space needed */ 960 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 961 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 962 reallocs++; 963 } 964 965 /* copy data into free_space and free_space_lvl, then initialize lnk */ 966 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 967 bj_ptr[i] = current_space->array; 968 bjlvl_ptr[i] = current_space_lvl->array; 969 970 /* make sure the active row i has diagonal entry */ 971 if (*(bj_ptr[i]+bdiag[i]) != i) { 972 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 973 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 974 } 975 976 current_space->array += nzi; 977 current_space->local_used += nzi; 978 current_space->local_remaining -= nzi; 979 current_space_lvl->array += nzi; 980 current_space_lvl->local_used += nzi; 981 current_space_lvl->local_remaining -= nzi; 982 } 983 984 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 985 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 986 987 /* destroy list of free space and other temporary arrays */ 988 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 989 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 990 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 991 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 992 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 993 994 { 995 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 996 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 997 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 998 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 999 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1000 if (diagonal_fill) { 1001 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1002 } 1003 } 1004 1005 /* put together the new matrix */ 1006 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1007 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1008 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1009 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1010 b = (Mat_SeqAIJ*)(*fact)->data; 1011 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1012 b->singlemalloc = PETSC_FALSE; 1013 len = (bi[n] )*sizeof(PetscScalar); 1014 /* the next line frees the default space generated by the Create() */ 1015 ierr = PetscFree(b->a);CHKERRQ(ierr); 1016 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1017 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1018 b->j = bj; 1019 b->i = bi; 1020 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1021 b->diag = bdiag; 1022 b->ilen = 0; 1023 b->imax = 0; 1024 b->row = isrow; 1025 b->col = iscol; 1026 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1027 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1028 b->icol = isicol; 1029 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1030 /* In b structure: Free imax, ilen, old a, old j. 1031 Allocate bdiag, solve_work, new a, new j */ 1032 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1033 b->maxnz = b->nz = bi[n] ; 1034 (*fact)->factor = FACTOR_LU; 1035 (*fact)->info.factor_mallocs = reallocs; 1036 (*fact)->info.fill_ratio_given = f; 1037 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1038 1039 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1040 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1041 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #include "src/mat/impls/sbaij/seq/sbaij.h" 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1048 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1049 { 1050 Mat C = *B; 1051 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1052 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1053 IS ip=b->row; 1054 PetscErrorCode ierr; 1055 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1056 PetscInt *ai=a->i,*aj=a->j; 1057 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1058 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1059 PetscReal zeropivot,rs,shiftnz; 1060 PetscTruth shiftpd; 1061 ChShift_Ctx sctx; 1062 PetscInt newshift; 1063 1064 PetscFunctionBegin; 1065 shiftnz = info->shiftnz; 1066 shiftpd = info->shiftpd; 1067 zeropivot = info->zeropivot; 1068 1069 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1070 1071 /* initialization */ 1072 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1073 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1074 jl = il + mbs; 1075 rtmp = (MatScalar*)(jl + mbs); 1076 1077 sctx.shift_amount = 0; 1078 sctx.nshift = 0; 1079 do { 1080 sctx.chshift = PETSC_FALSE; 1081 for (i=0; i<mbs; i++) { 1082 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1083 } 1084 1085 for (k = 0; k<mbs; k++){ 1086 bval = ba + bi[k]; 1087 /* initialize k-th row by the perm[k]-th row of A */ 1088 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1089 for (j = jmin; j < jmax; j++){ 1090 col = rip[aj[j]]; 1091 if (col >= k){ /* only take upper triangular entry */ 1092 rtmp[col] = aa[j]; 1093 *bval++ = 0.0; /* for in-place factorization */ 1094 } 1095 } 1096 /* shift the diagonal of the matrix */ 1097 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1098 1099 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1100 dk = rtmp[k]; 1101 i = jl[k]; /* first row to be added to k_th row */ 1102 1103 while (i < k){ 1104 nexti = jl[i]; /* next row to be added to k_th row */ 1105 1106 /* compute multiplier, update diag(k) and U(i,k) */ 1107 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1108 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1109 dk += uikdi*ba[ili]; 1110 ba[ili] = uikdi; /* -U(i,k) */ 1111 1112 /* add multiple of row i to k-th row */ 1113 jmin = ili + 1; jmax = bi[i+1]; 1114 if (jmin < jmax){ 1115 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1116 /* update il and jl for row i */ 1117 il[i] = jmin; 1118 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1119 } 1120 i = nexti; 1121 } 1122 1123 /* shift the diagonals when zero pivot is detected */ 1124 /* compute rs=sum of abs(off-diagonal) */ 1125 rs = 0.0; 1126 jmin = bi[k]+1; 1127 nz = bi[k+1] - jmin; 1128 if (nz){ 1129 bcol = bj + jmin; 1130 while (nz--){ 1131 rs += PetscAbsScalar(rtmp[*bcol]); 1132 bcol++; 1133 } 1134 } 1135 1136 sctx.rs = rs; 1137 sctx.pv = dk; 1138 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1139 if (newshift == 1){ 1140 break; /* sctx.shift_amount is updated */ 1141 } else if (newshift == -1){ 1142 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1143 } 1144 1145 /* copy data into U(k,:) */ 1146 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1147 jmin = bi[k]+1; jmax = bi[k+1]; 1148 if (jmin < jmax) { 1149 for (j=jmin; j<jmax; j++){ 1150 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1151 } 1152 /* add the k-th row into il and jl */ 1153 il[k] = jmin; 1154 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1155 } 1156 } 1157 } while (sctx.chshift); 1158 ierr = PetscFree(il);CHKERRQ(ierr); 1159 1160 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1161 C->factor = FACTOR_CHOLESKY; 1162 C->assembled = PETSC_TRUE; 1163 C->preallocated = PETSC_TRUE; 1164 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1165 if (sctx.nshift){ 1166 if (shiftnz) { 1167 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1168 } else if (shiftpd) { 1169 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1170 } 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1177 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1178 { 1179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1180 Mat_SeqSBAIJ *b; 1181 Mat B; 1182 PetscErrorCode ierr; 1183 PetscTruth perm_identity; 1184 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1185 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1186 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1187 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1188 PetscReal fill=info->fill,levels=info->levels; 1189 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1190 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1191 PetscBT lnkbt; 1192 1193 PetscFunctionBegin; 1194 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1195 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1196 1197 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1198 ui[0] = 0; 1199 1200 /* special case that simply copies fill pattern */ 1201 if (!levels && perm_identity) { 1202 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1203 for (i=0; i<am; i++) { 1204 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1205 } 1206 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1207 cols = uj; 1208 for (i=0; i<am; i++) { 1209 aj = a->j + a->diag[i]; 1210 ncols = ui[i+1] - ui[i]; 1211 for (j=0; j<ncols; j++) *cols++ = *aj++; 1212 } 1213 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1214 /* initialization */ 1215 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1216 1217 /* jl: linked list for storing indices of the pivot rows 1218 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1219 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1220 il = jl + am; 1221 uj_ptr = (PetscInt**)(il + am); 1222 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1223 for (i=0; i<am; i++){ 1224 jl[i] = am; il[i] = 0; 1225 } 1226 1227 /* create and initialize a linked list for storing column indices of the active row k */ 1228 nlnk = am + 1; 1229 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1230 1231 /* initial FreeSpace size is fill*(ai[am]+1) */ 1232 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1233 current_space = free_space; 1234 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1235 current_space_lvl = free_space_lvl; 1236 1237 for (k=0; k<am; k++){ /* for each active row k */ 1238 /* initialize lnk by the column indices of row rip[k] of A */ 1239 nzk = 0; 1240 ncols = ai[rip[k]+1] - ai[rip[k]]; 1241 ncols_upper = 0; 1242 for (j=0; j<ncols; j++){ 1243 i = *(aj + ai[rip[k]] + j); 1244 if (rip[i] >= k){ /* only take upper triangular entry */ 1245 ajtmp[ncols_upper] = i; 1246 ncols_upper++; 1247 } 1248 } 1249 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1250 nzk += nlnk; 1251 1252 /* update lnk by computing fill-in for each pivot row to be merged in */ 1253 prow = jl[k]; /* 1st pivot row */ 1254 1255 while (prow < k){ 1256 nextprow = jl[prow]; 1257 1258 /* merge prow into k-th row */ 1259 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1260 jmax = ui[prow+1]; 1261 ncols = jmax-jmin; 1262 i = jmin - ui[prow]; 1263 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1264 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1265 j = *(uj - 1); 1266 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1267 nzk += nlnk; 1268 1269 /* update il and jl for prow */ 1270 if (jmin < jmax){ 1271 il[prow] = jmin; 1272 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1273 } 1274 prow = nextprow; 1275 } 1276 1277 /* if free space is not available, make more free space */ 1278 if (current_space->local_remaining<nzk) { 1279 i = am - k + 1; /* num of unfactored rows */ 1280 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1281 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1282 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1283 reallocs++; 1284 } 1285 1286 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1287 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1288 1289 /* add the k-th row into il and jl */ 1290 if (nzk > 1){ 1291 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1292 jl[k] = jl[i]; jl[i] = k; 1293 il[k] = ui[k] + 1; 1294 } 1295 uj_ptr[k] = current_space->array; 1296 uj_lvl_ptr[k] = current_space_lvl->array; 1297 1298 current_space->array += nzk; 1299 current_space->local_used += nzk; 1300 current_space->local_remaining -= nzk; 1301 1302 current_space_lvl->array += nzk; 1303 current_space_lvl->local_used += nzk; 1304 current_space_lvl->local_remaining -= nzk; 1305 1306 ui[k+1] = ui[k] + nzk; 1307 } 1308 1309 if (ai[am] != 0) { 1310 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1311 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1312 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1313 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1314 } else { 1315 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1316 } 1317 1318 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1319 ierr = PetscFree(jl);CHKERRQ(ierr); 1320 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1321 1322 /* destroy list of free space and other temporary array(s) */ 1323 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1324 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1325 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1326 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1327 1328 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1329 1330 /* put together the new matrix in MATSEQSBAIJ format */ 1331 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1332 B = *fact; 1333 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1334 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1335 1336 b = (Mat_SeqSBAIJ*)B->data; 1337 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1338 b->singlemalloc = PETSC_FALSE; 1339 /* the next line frees the default space generated by the Create() */ 1340 ierr = PetscFree(b->a);CHKERRQ(ierr); 1341 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1342 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1343 b->j = uj; 1344 b->i = ui; 1345 b->diag = 0; 1346 b->ilen = 0; 1347 b->imax = 0; 1348 b->row = perm; 1349 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1350 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1351 b->icol = perm; 1352 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1354 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1355 b->maxnz = b->nz = ui[am]; 1356 1357 B->factor = FACTOR_CHOLESKY; 1358 B->info.factor_mallocs = reallocs; 1359 B->info.fill_ratio_given = fill; 1360 if (ai[am] != 0) { 1361 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1362 } else { 1363 B->info.fill_ratio_needed = 0.0; 1364 } 1365 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1366 if (perm_identity){ 1367 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1368 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1375 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1376 { 1377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1378 Mat_SeqSBAIJ *b; 1379 Mat B; 1380 PetscErrorCode ierr; 1381 PetscTruth perm_identity; 1382 PetscReal fill = info->fill; 1383 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1384 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1385 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1386 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1387 PetscBT lnkbt; 1388 IS iperm; 1389 1390 PetscFunctionBegin; 1391 /* check whether perm is the identity mapping */ 1392 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1393 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1394 1395 if (!perm_identity){ 1396 /* check if perm is symmetric! */ 1397 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1398 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1399 for (i=0; i<am; i++) { 1400 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1401 } 1402 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1403 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1404 } 1405 1406 /* initialization */ 1407 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1408 ui[0] = 0; 1409 1410 /* jl: linked list for storing indices of the pivot rows 1411 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1412 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1413 il = jl + am; 1414 cols = il + am; 1415 ui_ptr = (PetscInt**)(cols + am); 1416 for (i=0; i<am; i++){ 1417 jl[i] = am; il[i] = 0; 1418 } 1419 1420 /* create and initialize a linked list for storing column indices of the active row k */ 1421 nlnk = am + 1; 1422 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1423 1424 /* initial FreeSpace size is fill*(ai[am]+1) */ 1425 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1426 current_space = free_space; 1427 1428 for (k=0; k<am; k++){ /* for each active row k */ 1429 /* initialize lnk by the column indices of row rip[k] of A */ 1430 nzk = 0; 1431 ncols = ai[rip[k]+1] - ai[rip[k]]; 1432 ncols_upper = 0; 1433 for (j=0; j<ncols; j++){ 1434 i = rip[*(aj + ai[rip[k]] + j)]; 1435 if (i >= k){ /* only take upper triangular entry */ 1436 cols[ncols_upper] = i; 1437 ncols_upper++; 1438 } 1439 } 1440 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1441 nzk += nlnk; 1442 1443 /* update lnk by computing fill-in for each pivot row to be merged in */ 1444 prow = jl[k]; /* 1st pivot row */ 1445 1446 while (prow < k){ 1447 nextprow = jl[prow]; 1448 /* merge prow into k-th row */ 1449 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1450 jmax = ui[prow+1]; 1451 ncols = jmax-jmin; 1452 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1453 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1454 nzk += nlnk; 1455 1456 /* update il and jl for prow */ 1457 if (jmin < jmax){ 1458 il[prow] = jmin; 1459 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1460 } 1461 prow = nextprow; 1462 } 1463 1464 /* if free space is not available, make more free space */ 1465 if (current_space->local_remaining<nzk) { 1466 i = am - k + 1; /* num of unfactored rows */ 1467 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1468 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1469 reallocs++; 1470 } 1471 1472 /* copy data into free space, then initialize lnk */ 1473 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1474 1475 /* add the k-th row into il and jl */ 1476 if (nzk-1 > 0){ 1477 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1478 jl[k] = jl[i]; jl[i] = k; 1479 il[k] = ui[k] + 1; 1480 } 1481 ui_ptr[k] = current_space->array; 1482 current_space->array += nzk; 1483 current_space->local_used += nzk; 1484 current_space->local_remaining -= nzk; 1485 1486 ui[k+1] = ui[k] + nzk; 1487 } 1488 1489 if (ai[am] != 0) { 1490 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1491 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1492 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1493 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1494 } else { 1495 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1496 } 1497 1498 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1499 ierr = PetscFree(jl);CHKERRQ(ierr); 1500 1501 /* destroy list of free space and other temporary array(s) */ 1502 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1503 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1504 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1505 1506 /* put together the new matrix in MATSEQSBAIJ format */ 1507 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1508 B = *fact; 1509 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1510 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1511 1512 b = (Mat_SeqSBAIJ*)B->data; 1513 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1514 b->singlemalloc = PETSC_FALSE; 1515 /* the next line frees the default space generated by the Create() */ 1516 ierr = PetscFree(b->a);CHKERRQ(ierr); 1517 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1518 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1519 b->j = uj; 1520 b->i = ui; 1521 b->diag = 0; 1522 b->ilen = 0; 1523 b->imax = 0; 1524 b->row = perm; 1525 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1526 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1527 b->icol = perm; 1528 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1529 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1530 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1531 b->maxnz = b->nz = ui[am]; 1532 1533 B->factor = FACTOR_CHOLESKY; 1534 B->info.factor_mallocs = reallocs; 1535 B->info.fill_ratio_given = fill; 1536 if (ai[am] != 0) { 1537 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1538 } else { 1539 B->info.fill_ratio_needed = 0.0; 1540 } 1541 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1542 if (perm_identity){ 1543 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1544 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548