1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 if (ai[n] != 0) { 355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 356 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 357 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 360 } else { 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 362 } 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 375 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 376 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(*B)->data; 379 ierr = PetscFree(b->imax);CHKERRQ(ierr); 380 b->singlemalloc = PETSC_FALSE; 381 /* the next line frees the default space generated by the Create() */ 382 ierr = PetscFree(b->a);CHKERRQ(ierr); 383 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 384 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 385 b->j = bj; 386 b->i = bi; 387 b->diag = bdiag; 388 b->ilen = 0; 389 b->imax = 0; 390 b->row = isrow; 391 b->col = iscol; 392 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 393 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 394 b->icol = isicol; 395 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 396 397 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 398 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 399 b->maxnz = b->nz = bi[n] ; 400 401 (*B)->factor = FACTOR_LU; 402 (*B)->info.factor_mallocs = reallocs; 403 (*B)->info.fill_ratio_given = f; 404 405 if (ai[n] != 0) { 406 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 407 } else { 408 (*B)->info.fill_ratio_needed = 0.0; 409 } 410 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 PetscFunctionReturn(0); 413 } 414 415 /* ----------------------------------------------------------- */ 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 419 { 420 Mat C=*B; 421 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 422 IS isrow = b->row,isicol = b->icol; 423 PetscErrorCode ierr; 424 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 425 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 426 PetscInt *diag_offset = b->diag,diag,*pj; 427 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 428 PetscReal rs,d; 429 PetscReal row_shift; 430 LUShift_Ctx sctx; 431 PetscInt newshift; 432 433 PetscFunctionBegin; 434 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 435 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 436 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 437 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 438 rtmps = rtmp; ics = ic; 439 440 if (!a->diag) { 441 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 442 } 443 /* if both shift schemes are chosen by user, only use info->shiftpd */ 444 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 445 if (info->shiftpd) { /* set sctx.shift_top=max{row_shift} */ 446 PetscInt *aai = a->i,*ddiag = a->diag; 447 sctx.shift_top = 0; 448 for (i=0; i<n; i++) { 449 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 450 d = (a->a)[ddiag[i]]; 451 row_shift = -PetscAbsScalar(d) - PetscRealPart(d); 452 v = a->a+aai[i]; 453 nz = aai[i+1] - aai[i]; 454 for (j=0; j<nz; j++) 455 row_shift += PetscAbsScalar(v[j]); 456 if (row_shift>sctx.shift_top) sctx.shift_top = row_shift; 457 } 458 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 459 sctx.shift_top *= 1.1; 460 sctx.nshift_max = 5; 461 sctx.shift_lo = 0.; 462 sctx.shift_hi = 1.; 463 } 464 465 sctx.shift_amount = 0; 466 sctx.nshift = 0; 467 do { 468 sctx.lushift = PETSC_FALSE; 469 for (i=0; i<n; i++){ 470 nz = bi[i+1] - bi[i]; 471 bjtmp = bj + bi[i]; 472 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 473 474 /* load in initial (unfactored row) */ 475 nz = a->i[r[i]+1] - a->i[r[i]]; 476 ajtmp = a->j + a->i[r[i]]; 477 v = a->a + a->i[r[i]]; 478 for (j=0; j<nz; j++) { 479 rtmp[ics[ajtmp[j]]] = v[j]; 480 } 481 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 482 483 row = *bjtmp++; 484 while (row < i) { 485 pc = rtmp + row; 486 if (*pc != 0.0) { 487 pv = b->a + diag_offset[row]; 488 pj = b->j + diag_offset[row] + 1; 489 multiplier = *pc / *pv++; 490 *pc = multiplier; 491 nz = bi[row+1] - diag_offset[row] - 1; 492 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 493 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 494 } 495 row = *bjtmp++; 496 } 497 /* finished row so stick it into b->a */ 498 pv = b->a + bi[i] ; 499 pj = b->j + bi[i] ; 500 nz = bi[i+1] - bi[i]; 501 diag = diag_offset[i] - bi[i]; 502 rs = 0.0; 503 for (j=0; j<nz; j++) { 504 pv[j] = rtmps[pj[j]]; 505 if (j != diag) rs += PetscAbsScalar(pv[j]); 506 } 507 508 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 509 sctx.rs = rs; 510 sctx.pv = pv[diag]; 511 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 512 if (newshift == 1){ 513 break; /* sctx.shift_amount is updated */ 514 } else if (newshift == -1){ 515 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 516 } 517 } 518 519 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 520 /* 521 * if no shift in this attempt & shifting & started shifting & can refine, 522 * then try lower shift 523 */ 524 sctx.shift_hi = info->shift_fraction; 525 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 526 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 527 sctx.lushift = PETSC_TRUE; 528 sctx.nshift++; 529 } 530 } while (sctx.lushift); 531 532 /* invert diagonal entries for simplier triangular solves */ 533 for (i=0; i<n; i++) { 534 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 535 } 536 537 ierr = PetscFree(rtmp);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 539 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 540 C->factor = FACTOR_LU; 541 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 542 C->assembled = PETSC_TRUE; 543 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 544 if (sctx.nshift){ 545 if (info->shiftnz) { 546 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 547 } else if (info->shiftpd) { 548 b->lu_shift_fraction = info->shift_fraction; 549 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top); 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 558 { 559 PetscFunctionBegin; 560 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 561 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 562 PetscFunctionReturn(0); 563 } 564 565 566 /* ----------------------------------------------------------- */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatLUFactor_SeqAIJ" 569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 570 { 571 PetscErrorCode ierr; 572 Mat C; 573 574 PetscFunctionBegin; 575 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 576 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 577 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 578 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatSolve_SeqAIJ" 584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 585 { 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 587 IS iscol = a->col,isrow = a->row; 588 PetscErrorCode ierr; 589 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 590 PetscInt nz,*rout,*cout; 591 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 592 593 PetscFunctionBegin; 594 if (!n) PetscFunctionReturn(0); 595 596 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 tmp = a->solve_work; 599 600 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 601 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 602 603 /* forward solve the lower triangular */ 604 tmp[0] = b[*r++]; 605 tmps = tmp; 606 for (i=1; i<n; i++) { 607 v = aa + ai[i] ; 608 vi = aj + ai[i] ; 609 nz = a->diag[i] - ai[i]; 610 sum = b[*r++]; 611 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 612 tmp[i] = sum; 613 } 614 615 /* backward solve the upper triangular */ 616 for (i=n-1; i>=0; i--){ 617 v = aa + a->diag[i] + 1; 618 vi = aj + a->diag[i] + 1; 619 nz = ai[i+1] - a->diag[i] - 1; 620 sum = tmp[i]; 621 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 622 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 623 } 624 625 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 626 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 627 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 628 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 629 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* ----------------------------------------------------------- */ 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 637 { 638 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 639 PetscErrorCode ierr; 640 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 641 PetscScalar *x,*b,*aa = a->a; 642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 643 PetscInt adiag_i,i,*vi,nz,ai_i; 644 PetscScalar *v,sum; 645 #endif 646 647 PetscFunctionBegin; 648 if (!n) PetscFunctionReturn(0); 649 650 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 651 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 652 653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 654 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 655 #else 656 /* forward solve the lower triangular */ 657 x[0] = b[0]; 658 for (i=1; i<n; i++) { 659 ai_i = ai[i]; 660 v = aa + ai_i; 661 vi = aj + ai_i; 662 nz = adiag[i] - ai_i; 663 sum = b[i]; 664 while (nz--) sum -= *v++ * x[*vi++]; 665 x[i] = sum; 666 } 667 668 /* backward solve the upper triangular */ 669 for (i=n-1; i>=0; i--){ 670 adiag_i = adiag[i]; 671 v = aa + adiag_i + 1; 672 vi = aj + adiag_i + 1; 673 nz = ai[i+1] - adiag_i - 1; 674 sum = x[i]; 675 while (nz--) sum -= *v++ * x[*vi++]; 676 x[i] = sum*aa[adiag_i]; 677 } 678 #endif 679 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 688 { 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 IS iscol = a->col,isrow = a->row; 691 PetscErrorCode ierr; 692 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 693 PetscInt nz,*rout,*cout; 694 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 695 696 PetscFunctionBegin; 697 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 698 699 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 701 tmp = a->solve_work; 702 703 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 704 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 705 706 /* forward solve the lower triangular */ 707 tmp[0] = b[*r++]; 708 for (i=1; i<n; i++) { 709 v = aa + ai[i] ; 710 vi = aj + ai[i] ; 711 nz = a->diag[i] - ai[i]; 712 sum = b[*r++]; 713 while (nz--) sum -= *v++ * tmp[*vi++ ]; 714 tmp[i] = sum; 715 } 716 717 /* backward solve the upper triangular */ 718 for (i=n-1; i>=0; i--){ 719 v = aa + a->diag[i] + 1; 720 vi = aj + a->diag[i] + 1; 721 nz = ai[i+1] - a->diag[i] - 1; 722 sum = tmp[i]; 723 while (nz--) sum -= *v++ * tmp[*vi++ ]; 724 tmp[i] = sum*aa[a->diag[i]]; 725 x[*c--] += tmp[i]; 726 } 727 728 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 730 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 732 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 733 734 PetscFunctionReturn(0); 735 } 736 /* -------------------------------------------------------------------*/ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 740 { 741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 742 IS iscol = a->col,isrow = a->row; 743 PetscErrorCode ierr; 744 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 745 PetscInt nz,*rout,*cout,*diag = a->diag; 746 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 747 748 PetscFunctionBegin; 749 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 750 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 751 tmp = a->solve_work; 752 753 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 754 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 755 756 /* copy the b into temp work space according to permutation */ 757 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 758 759 /* forward solve the U^T */ 760 for (i=0; i<n; i++) { 761 v = aa + diag[i] ; 762 vi = aj + diag[i] + 1; 763 nz = ai[i+1] - diag[i] - 1; 764 s1 = tmp[i]; 765 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 766 while (nz--) { 767 tmp[*vi++ ] -= (*v++)*s1; 768 } 769 tmp[i] = s1; 770 } 771 772 /* backward solve the L^T */ 773 for (i=n-1; i>=0; i--){ 774 v = aa + diag[i] - 1 ; 775 vi = aj + diag[i] - 1 ; 776 nz = diag[i] - ai[i]; 777 s1 = tmp[i]; 778 while (nz--) { 779 tmp[*vi-- ] -= (*v--)*s1; 780 } 781 } 782 783 /* copy tmp into x according to permutation */ 784 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 791 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 800 IS iscol = a->col,isrow = a->row; 801 PetscErrorCode ierr; 802 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 803 PetscInt nz,*rout,*cout,*diag = a->diag; 804 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 805 806 PetscFunctionBegin; 807 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 808 809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 tmp = a->solve_work; 812 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 /* copy the b into temp work space according to permutation */ 817 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 818 819 /* forward solve the U^T */ 820 for (i=0; i<n; i++) { 821 v = aa + diag[i] ; 822 vi = aj + diag[i] + 1; 823 nz = ai[i+1] - diag[i] - 1; 824 tmp[i] *= *v++; 825 while (nz--) { 826 tmp[*vi++ ] -= (*v++)*tmp[i]; 827 } 828 } 829 830 /* backward solve the L^T */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + diag[i] - 1 ; 833 vi = aj + diag[i] - 1 ; 834 nz = diag[i] - ai[i]; 835 while (nz--) { 836 tmp[*vi-- ] -= (*v--)*tmp[i]; 837 } 838 } 839 840 /* copy tmp into x according to permutation */ 841 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 848 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 /* ----------------------------------------------------------------*/ 852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 859 IS isicol; 860 PetscErrorCode ierr; 861 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 862 PetscInt *bi,*cols,nnz,*cols_lvl; 863 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 864 PetscInt i,levels,diagonal_fill; 865 PetscTruth col_identity,row_identity; 866 PetscReal f; 867 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 868 PetscBT lnkbt; 869 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 870 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 871 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 872 873 PetscFunctionBegin; 874 f = info->fill; 875 levels = (PetscInt)info->levels; 876 diagonal_fill = (PetscInt)info->diagonal_fill; 877 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 878 879 /* special case that simply copies fill pattern */ 880 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 881 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 882 if (!levels && row_identity && col_identity) { 883 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 884 (*fact)->factor = FACTOR_LU; 885 b = (Mat_SeqAIJ*)(*fact)->data; 886 if (!b->diag) { 887 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 } 889 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 890 b->row = isrow; 891 b->col = iscol; 892 b->icol = isicol; 893 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 894 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 895 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 901 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 902 903 /* get new row pointers */ 904 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 905 bi[0] = 0; 906 /* bdiag is location of diagonal in factor */ 907 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 908 bdiag[0] = 0; 909 910 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 911 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 912 913 /* create a linked list for storing column indices of the active row */ 914 nlnk = n + 1; 915 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 916 917 /* initial FreeSpace size is f*(ai[n]+1) */ 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 919 current_space = free_space; 920 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 921 current_space_lvl = free_space_lvl; 922 923 for (i=0; i<n; i++) { 924 nzi = 0; 925 /* copy current row into linked list */ 926 nnz = ai[r[i]+1] - ai[r[i]]; 927 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 928 cols = aj + ai[r[i]]; 929 lnk[i] = -1; /* marker to indicate if diagonal exists */ 930 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 931 nzi += nlnk; 932 933 /* make sure diagonal entry is included */ 934 if (diagonal_fill && lnk[i] == -1) { 935 fm = n; 936 while (lnk[fm] < i) fm = lnk[fm]; 937 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 938 lnk[fm] = i; 939 lnk_lvl[i] = 0; 940 nzi++; dcount++; 941 } 942 943 /* add pivot rows into the active row */ 944 nzbd = 0; 945 prow = lnk[n]; 946 while (prow < i) { 947 nnz = bdiag[prow]; 948 cols = bj_ptr[prow] + nnz + 1; 949 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 950 nnz = bi[prow+1] - bi[prow] - nnz - 1; 951 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 952 nzi += nlnk; 953 prow = lnk[prow]; 954 nzbd++; 955 } 956 bdiag[i] = nzbd; 957 bi[i+1] = bi[i] + nzi; 958 959 /* if free space is not available, make more free space */ 960 if (current_space->local_remaining<nzi) { 961 nnz = nzi*(n - i); /* estimated and max additional space needed */ 962 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 963 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 964 reallocs++; 965 } 966 967 /* copy data into free_space and free_space_lvl, then initialize lnk */ 968 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 969 bj_ptr[i] = current_space->array; 970 bjlvl_ptr[i] = current_space_lvl->array; 971 972 /* make sure the active row i has diagonal entry */ 973 if (*(bj_ptr[i]+bdiag[i]) != i) { 974 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 975 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 976 } 977 978 current_space->array += nzi; 979 current_space->local_used += nzi; 980 current_space->local_remaining -= nzi; 981 current_space_lvl->array += nzi; 982 current_space_lvl->local_used += nzi; 983 current_space_lvl->local_remaining -= nzi; 984 } 985 986 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 987 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 988 989 /* destroy list of free space and other temporary arrays */ 990 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 991 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 992 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 993 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 994 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 995 996 { 997 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 998 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 999 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1000 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1001 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1002 if (diagonal_fill) { 1003 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1004 } 1005 } 1006 1007 /* put together the new matrix */ 1008 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1009 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1010 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1011 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1012 b = (Mat_SeqAIJ*)(*fact)->data; 1013 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1014 b->singlemalloc = PETSC_FALSE; 1015 len = (bi[n] )*sizeof(PetscScalar); 1016 /* the next line frees the default space generated by the Create() */ 1017 ierr = PetscFree(b->a);CHKERRQ(ierr); 1018 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1019 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1020 b->j = bj; 1021 b->i = bi; 1022 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1023 b->diag = bdiag; 1024 b->ilen = 0; 1025 b->imax = 0; 1026 b->row = isrow; 1027 b->col = iscol; 1028 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1029 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1030 b->icol = isicol; 1031 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1032 /* In b structure: Free imax, ilen, old a, old j. 1033 Allocate bdiag, solve_work, new a, new j */ 1034 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1035 b->maxnz = b->nz = bi[n] ; 1036 (*fact)->factor = FACTOR_LU; 1037 (*fact)->info.factor_mallocs = reallocs; 1038 (*fact)->info.fill_ratio_given = f; 1039 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1040 1041 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1042 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1043 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #include "src/mat/impls/sbaij/seq/sbaij.h" 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1050 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1051 { 1052 Mat C = *B; 1053 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1054 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1055 IS ip=b->row; 1056 PetscErrorCode ierr; 1057 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1058 PetscInt *ai=a->i,*aj=a->j; 1059 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1060 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1061 PetscReal zeropivot,rs,shiftnz; 1062 PetscTruth shiftpd; 1063 ChShift_Ctx sctx; 1064 PetscInt newshift; 1065 1066 PetscFunctionBegin; 1067 shiftnz = info->shiftnz; 1068 shiftpd = info->shiftpd; 1069 zeropivot = info->zeropivot; 1070 1071 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1072 1073 /* initialization */ 1074 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1075 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1076 jl = il + mbs; 1077 rtmp = (MatScalar*)(jl + mbs); 1078 1079 sctx.shift_amount = 0; 1080 sctx.nshift = 0; 1081 do { 1082 sctx.chshift = PETSC_FALSE; 1083 for (i=0; i<mbs; i++) { 1084 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1085 } 1086 1087 for (k = 0; k<mbs; k++){ 1088 bval = ba + bi[k]; 1089 /* initialize k-th row by the perm[k]-th row of A */ 1090 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1091 for (j = jmin; j < jmax; j++){ 1092 col = rip[aj[j]]; 1093 if (col >= k){ /* only take upper triangular entry */ 1094 rtmp[col] = aa[j]; 1095 *bval++ = 0.0; /* for in-place factorization */ 1096 } 1097 } 1098 /* shift the diagonal of the matrix */ 1099 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1100 1101 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1102 dk = rtmp[k]; 1103 i = jl[k]; /* first row to be added to k_th row */ 1104 1105 while (i < k){ 1106 nexti = jl[i]; /* next row to be added to k_th row */ 1107 1108 /* compute multiplier, update diag(k) and U(i,k) */ 1109 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1110 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1111 dk += uikdi*ba[ili]; 1112 ba[ili] = uikdi; /* -U(i,k) */ 1113 1114 /* add multiple of row i to k-th row */ 1115 jmin = ili + 1; jmax = bi[i+1]; 1116 if (jmin < jmax){ 1117 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1118 /* update il and jl for row i */ 1119 il[i] = jmin; 1120 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1121 } 1122 i = nexti; 1123 } 1124 1125 /* shift the diagonals when zero pivot is detected */ 1126 /* compute rs=sum of abs(off-diagonal) */ 1127 rs = 0.0; 1128 jmin = bi[k]+1; 1129 nz = bi[k+1] - jmin; 1130 if (nz){ 1131 bcol = bj + jmin; 1132 while (nz--){ 1133 rs += PetscAbsScalar(rtmp[*bcol]); 1134 bcol++; 1135 } 1136 } 1137 1138 sctx.rs = rs; 1139 sctx.pv = dk; 1140 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1141 if (newshift == 1){ 1142 break; /* sctx.shift_amount is updated */ 1143 } else if (newshift == -1){ 1144 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1145 } 1146 1147 /* copy data into U(k,:) */ 1148 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1149 jmin = bi[k]+1; jmax = bi[k+1]; 1150 if (jmin < jmax) { 1151 for (j=jmin; j<jmax; j++){ 1152 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1153 } 1154 /* add the k-th row into il and jl */ 1155 il[k] = jmin; 1156 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1157 } 1158 } 1159 } while (sctx.chshift); 1160 ierr = PetscFree(il);CHKERRQ(ierr); 1161 1162 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1163 C->factor = FACTOR_CHOLESKY; 1164 C->assembled = PETSC_TRUE; 1165 C->preallocated = PETSC_TRUE; 1166 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1167 if (sctx.nshift){ 1168 if (shiftnz) { 1169 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1170 } else if (shiftpd) { 1171 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1172 } 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1179 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1180 { 1181 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1182 Mat_SeqSBAIJ *b; 1183 Mat B; 1184 PetscErrorCode ierr; 1185 PetscTruth perm_identity; 1186 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1187 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1188 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1189 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1190 PetscReal fill=info->fill,levels=info->levels; 1191 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1192 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1193 PetscBT lnkbt; 1194 1195 PetscFunctionBegin; 1196 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1197 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1198 1199 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1200 ui[0] = 0; 1201 1202 /* special case that simply copies fill pattern */ 1203 if (!levels && perm_identity) { 1204 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1205 for (i=0; i<am; i++) { 1206 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1207 } 1208 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1209 cols = uj; 1210 for (i=0; i<am; i++) { 1211 aj = a->j + a->diag[i]; 1212 ncols = ui[i+1] - ui[i]; 1213 for (j=0; j<ncols; j++) *cols++ = *aj++; 1214 } 1215 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1216 /* initialization */ 1217 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1218 1219 /* jl: linked list for storing indices of the pivot rows 1220 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1221 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1222 il = jl + am; 1223 uj_ptr = (PetscInt**)(il + am); 1224 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1225 for (i=0; i<am; i++){ 1226 jl[i] = am; il[i] = 0; 1227 } 1228 1229 /* create and initialize a linked list for storing column indices of the active row k */ 1230 nlnk = am + 1; 1231 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1232 1233 /* initial FreeSpace size is fill*(ai[am]+1) */ 1234 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1235 current_space = free_space; 1236 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1237 current_space_lvl = free_space_lvl; 1238 1239 for (k=0; k<am; k++){ /* for each active row k */ 1240 /* initialize lnk by the column indices of row rip[k] of A */ 1241 nzk = 0; 1242 ncols = ai[rip[k]+1] - ai[rip[k]]; 1243 ncols_upper = 0; 1244 for (j=0; j<ncols; j++){ 1245 i = *(aj + ai[rip[k]] + j); 1246 if (rip[i] >= k){ /* only take upper triangular entry */ 1247 ajtmp[ncols_upper] = i; 1248 ncols_upper++; 1249 } 1250 } 1251 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1252 nzk += nlnk; 1253 1254 /* update lnk by computing fill-in for each pivot row to be merged in */ 1255 prow = jl[k]; /* 1st pivot row */ 1256 1257 while (prow < k){ 1258 nextprow = jl[prow]; 1259 1260 /* merge prow into k-th row */ 1261 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1262 jmax = ui[prow+1]; 1263 ncols = jmax-jmin; 1264 i = jmin - ui[prow]; 1265 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1266 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1267 j = *(uj - 1); 1268 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1269 nzk += nlnk; 1270 1271 /* update il and jl for prow */ 1272 if (jmin < jmax){ 1273 il[prow] = jmin; 1274 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1275 } 1276 prow = nextprow; 1277 } 1278 1279 /* if free space is not available, make more free space */ 1280 if (current_space->local_remaining<nzk) { 1281 i = am - k + 1; /* num of unfactored rows */ 1282 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1283 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1284 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1285 reallocs++; 1286 } 1287 1288 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1289 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1290 1291 /* add the k-th row into il and jl */ 1292 if (nzk > 1){ 1293 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1294 jl[k] = jl[i]; jl[i] = k; 1295 il[k] = ui[k] + 1; 1296 } 1297 uj_ptr[k] = current_space->array; 1298 uj_lvl_ptr[k] = current_space_lvl->array; 1299 1300 current_space->array += nzk; 1301 current_space->local_used += nzk; 1302 current_space->local_remaining -= nzk; 1303 1304 current_space_lvl->array += nzk; 1305 current_space_lvl->local_used += nzk; 1306 current_space_lvl->local_remaining -= nzk; 1307 1308 ui[k+1] = ui[k] + nzk; 1309 } 1310 1311 if (ai[am] != 0) { 1312 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1313 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1314 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1315 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1316 } else { 1317 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1318 } 1319 1320 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1321 ierr = PetscFree(jl);CHKERRQ(ierr); 1322 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1323 1324 /* destroy list of free space and other temporary array(s) */ 1325 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1326 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1327 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1328 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1329 1330 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1331 1332 /* put together the new matrix in MATSEQSBAIJ format */ 1333 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1334 B = *fact; 1335 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1336 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1337 1338 b = (Mat_SeqSBAIJ*)B->data; 1339 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1340 b->singlemalloc = PETSC_FALSE; 1341 /* the next line frees the default space generated by the Create() */ 1342 ierr = PetscFree(b->a);CHKERRQ(ierr); 1343 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1344 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1345 b->j = uj; 1346 b->i = ui; 1347 b->diag = 0; 1348 b->ilen = 0; 1349 b->imax = 0; 1350 b->row = perm; 1351 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1352 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1353 b->icol = perm; 1354 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1355 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1356 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1357 b->maxnz = b->nz = ui[am]; 1358 1359 B->factor = FACTOR_CHOLESKY; 1360 B->info.factor_mallocs = reallocs; 1361 B->info.fill_ratio_given = fill; 1362 if (ai[am] != 0) { 1363 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1364 } else { 1365 B->info.fill_ratio_needed = 0.0; 1366 } 1367 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1368 if (perm_identity){ 1369 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1370 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1377 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1378 { 1379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1380 Mat_SeqSBAIJ *b; 1381 Mat B; 1382 PetscErrorCode ierr; 1383 PetscTruth perm_identity; 1384 PetscReal fill = info->fill; 1385 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1386 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1387 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1388 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1389 PetscBT lnkbt; 1390 IS iperm; 1391 1392 PetscFunctionBegin; 1393 /* check whether perm is the identity mapping */ 1394 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1395 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1396 1397 if (!perm_identity){ 1398 /* check if perm is symmetric! */ 1399 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1400 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1401 for (i=0; i<am; i++) { 1402 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1403 } 1404 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1405 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1406 } 1407 1408 /* initialization */ 1409 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1410 ui[0] = 0; 1411 1412 /* jl: linked list for storing indices of the pivot rows 1413 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1414 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1415 il = jl + am; 1416 cols = il + am; 1417 ui_ptr = (PetscInt**)(cols + am); 1418 for (i=0; i<am; i++){ 1419 jl[i] = am; il[i] = 0; 1420 } 1421 1422 /* create and initialize a linked list for storing column indices of the active row k */ 1423 nlnk = am + 1; 1424 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1425 1426 /* initial FreeSpace size is fill*(ai[am]+1) */ 1427 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1428 current_space = free_space; 1429 1430 for (k=0; k<am; k++){ /* for each active row k */ 1431 /* initialize lnk by the column indices of row rip[k] of A */ 1432 nzk = 0; 1433 ncols = ai[rip[k]+1] - ai[rip[k]]; 1434 ncols_upper = 0; 1435 for (j=0; j<ncols; j++){ 1436 i = rip[*(aj + ai[rip[k]] + j)]; 1437 if (i >= k){ /* only take upper triangular entry */ 1438 cols[ncols_upper] = i; 1439 ncols_upper++; 1440 } 1441 } 1442 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1443 nzk += nlnk; 1444 1445 /* update lnk by computing fill-in for each pivot row to be merged in */ 1446 prow = jl[k]; /* 1st pivot row */ 1447 1448 while (prow < k){ 1449 nextprow = jl[prow]; 1450 /* merge prow into k-th row */ 1451 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1452 jmax = ui[prow+1]; 1453 ncols = jmax-jmin; 1454 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1455 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1456 nzk += nlnk; 1457 1458 /* update il and jl for prow */ 1459 if (jmin < jmax){ 1460 il[prow] = jmin; 1461 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1462 } 1463 prow = nextprow; 1464 } 1465 1466 /* if free space is not available, make more free space */ 1467 if (current_space->local_remaining<nzk) { 1468 i = am - k + 1; /* num of unfactored rows */ 1469 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1470 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1471 reallocs++; 1472 } 1473 1474 /* copy data into free space, then initialize lnk */ 1475 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1476 1477 /* add the k-th row into il and jl */ 1478 if (nzk-1 > 0){ 1479 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1480 jl[k] = jl[i]; jl[i] = k; 1481 il[k] = ui[k] + 1; 1482 } 1483 ui_ptr[k] = current_space->array; 1484 current_space->array += nzk; 1485 current_space->local_used += nzk; 1486 current_space->local_remaining -= nzk; 1487 1488 ui[k+1] = ui[k] + nzk; 1489 } 1490 1491 if (ai[am] != 0) { 1492 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1493 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1494 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1495 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1496 } else { 1497 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1498 } 1499 1500 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1501 ierr = PetscFree(jl);CHKERRQ(ierr); 1502 1503 /* destroy list of free space and other temporary array(s) */ 1504 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1505 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1506 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1507 1508 /* put together the new matrix in MATSEQSBAIJ format */ 1509 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1510 B = *fact; 1511 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1512 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1513 1514 b = (Mat_SeqSBAIJ*)B->data; 1515 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1516 b->singlemalloc = PETSC_FALSE; 1517 /* the next line frees the default space generated by the Create() */ 1518 ierr = PetscFree(b->a);CHKERRQ(ierr); 1519 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1520 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1521 b->j = uj; 1522 b->i = ui; 1523 b->diag = 0; 1524 b->ilen = 0; 1525 b->imax = 0; 1526 b->row = perm; 1527 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1528 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1529 b->icol = perm; 1530 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1531 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1532 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1533 b->maxnz = b->nz = ui[am]; 1534 1535 B->factor = FACTOR_CHOLESKY; 1536 B->info.factor_mallocs = reallocs; 1537 B->info.fill_ratio_given = fill; 1538 if (ai[am] != 0) { 1539 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1540 } else { 1541 B->info.fill_ratio_needed = 0.0; 1542 } 1543 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1544 if (perm_identity){ 1545 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1546 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550