1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 PetscInt maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 reallocs++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 399 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 400 b->icol = isicol; 401 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 402 /* In b structure: Free imax, ilen, old a, old j. 403 Allocate idnew, solve_work, new a, new j */ 404 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 405 b->maxnz = b->nz = ainew[n] ; 406 407 (*B)->factor = FACTOR_LU; 408 (*B)->info.factor_mallocs = reallocs; 409 (*B)->info.fill_ratio_given = f; 410 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 413 if (ai[n] != 0) { 414 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 415 } else { 416 (*B)->info.fill_ratio_needed = 0.0; 417 } 418 PetscFunctionReturn(0); 419 } 420 /* ----------------------------------------------------------- */ 421 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 425 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 426 { 427 Mat C=*B; 428 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 429 IS isrow = b->row,isicol = b->icol; 430 PetscErrorCode ierr; 431 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 432 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 433 PetscInt *diag_offset = b->diag,diag,*pj; 434 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 435 PetscReal zeropivot,rs,d,shift_nonzero; 436 PetscReal row_shift,shift_top=0.; 437 PetscTruth shift_pd; 438 LUShift_Ctx sctx; 439 PetscInt newshift; 440 441 PetscFunctionBegin; 442 shift_nonzero = info->shiftnz; 443 shift_pd = info->shiftpd; 444 zeropivot = info->zeropivot; 445 446 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 447 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 448 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 449 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 450 rtmps = rtmp; ics = ic; 451 452 if (!a->diag) { 453 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 454 } 455 /* if both shift schemes are chosen by user, only use shift_pd */ 456 if (shift_pd && shift_nonzero) shift_nonzero = 0.0; 457 if (shift_pd) { /* set shift_top=max{row_shift} */ 458 PetscInt *aai = a->i,*ddiag = a->diag; 459 shift_top = 0; 460 for (i=0; i<n; i++) { 461 d = PetscAbsScalar((a->a)[ddiag[i]]); 462 /* calculate amt of shift needed for this row */ 463 if (d<=0) { 464 row_shift = 0; 465 } else { 466 row_shift = -2*d; 467 } 468 v = a->a+aai[i]; 469 nz = aai[i+1] - aai[i]; 470 for (j=0; j<nz; j++) 471 row_shift += PetscAbsScalar(v[j]); 472 if (row_shift>shift_top) shift_top = row_shift; 473 } 474 } 475 476 sctx.shift_amount = 0; 477 sctx.shift_top = shift_top; 478 sctx.nshift = 0; 479 sctx.nshift_max = 5; 480 sctx.shift_lo = 0.; 481 sctx.shift_hi = 1.; 482 do { 483 sctx.lushift = PETSC_FALSE; 484 for (i=0; i<n; i++){ 485 nz = bi[i+1] - bi[i]; 486 bjtmp = bj + bi[i]; 487 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 488 489 /* load in initial (unfactored row) */ 490 nz = a->i[r[i]+1] - a->i[r[i]]; 491 ajtmp = a->j + a->i[r[i]]; 492 v = a->a + a->i[r[i]]; 493 for (j=0; j<nz; j++) { 494 rtmp[ics[ajtmp[j]]] = v[j]; 495 } 496 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 497 498 row = *bjtmp++; 499 while (row < i) { 500 pc = rtmp + row; 501 if (*pc != 0.0) { 502 pv = b->a + diag_offset[row]; 503 pj = b->j + diag_offset[row] + 1; 504 multiplier = *pc / *pv++; 505 *pc = multiplier; 506 nz = bi[row+1] - diag_offset[row] - 1; 507 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 508 PetscLogFlops(2*nz); 509 } 510 row = *bjtmp++; 511 } 512 /* finished row so stick it into b->a */ 513 pv = b->a + bi[i] ; 514 pj = b->j + bi[i] ; 515 nz = bi[i+1] - bi[i]; 516 diag = diag_offset[i] - bi[i]; 517 rs = 0.0; 518 for (j=0; j<nz; j++) { 519 pv[j] = rtmps[pj[j]]; 520 if (j != diag) rs += PetscAbsScalar(pv[j]); 521 } 522 523 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 524 sctx.rs = rs; 525 sctx.pv = pv[diag]; 526 newshift = 0; 527 ierr = Mat_LUFactorCheckShift(info,&sctx,&newshift);CHKERRQ(ierr); 528 if (newshift == 1){ 529 break; /* sctx.shift_amount is updated */ 530 } else if (newshift == -1){ 531 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 532 } 533 } 534 535 if (shift_pd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 536 /* 537 * if no shift in this attempt & shifting & started shifting & can refine, 538 * then try lower shift 539 */ 540 sctx.shift_hi = info->shift_fraction; 541 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 542 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 543 sctx.lushift = PETSC_TRUE; 544 sctx.nshift++; 545 } 546 } while (sctx.lushift); 547 548 /* invert diagonal entries for simplier triangular solves */ 549 for (i=0; i<n; i++) { 550 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 551 } 552 553 ierr = PetscFree(rtmp);CHKERRQ(ierr); 554 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 555 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 556 C->factor = FACTOR_LU; 557 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 558 C->assembled = PETSC_TRUE; 559 PetscLogFlops(C->n); 560 if (sctx.nshift){ 561 if (shift_nonzero) { 562 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 563 } else if (shift_pd) { 564 b->lu_shift_fraction = info->shift_fraction; 565 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",info->shift_fraction,shift_top,sctx.nshift); 566 } 567 } 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 573 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 574 { 575 PetscFunctionBegin; 576 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 577 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 578 PetscFunctionReturn(0); 579 } 580 581 582 /* ----------------------------------------------------------- */ 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatLUFactor_SeqAIJ" 585 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 586 { 587 PetscErrorCode ierr; 588 Mat C; 589 590 PetscFunctionBegin; 591 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 592 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 593 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 594 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 595 PetscFunctionReturn(0); 596 } 597 /* ----------------------------------------------------------- */ 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatSolve_SeqAIJ" 600 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 601 { 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 603 IS iscol = a->col,isrow = a->row; 604 PetscErrorCode ierr; 605 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 606 PetscInt nz,*rout,*cout; 607 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 608 609 PetscFunctionBegin; 610 if (!n) PetscFunctionReturn(0); 611 612 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 614 tmp = a->solve_work; 615 616 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 617 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 618 619 /* forward solve the lower triangular */ 620 tmp[0] = b[*r++]; 621 tmps = tmp; 622 for (i=1; i<n; i++) { 623 v = aa + ai[i] ; 624 vi = aj + ai[i] ; 625 nz = a->diag[i] - ai[i]; 626 sum = b[*r++]; 627 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 628 tmp[i] = sum; 629 } 630 631 /* backward solve the upper triangular */ 632 for (i=n-1; i>=0; i--){ 633 v = aa + a->diag[i] + 1; 634 vi = aj + a->diag[i] + 1; 635 nz = ai[i+1] - a->diag[i] - 1; 636 sum = tmp[i]; 637 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 638 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 639 } 640 641 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 642 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 643 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 644 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 645 PetscLogFlops(2*a->nz - A->n); 646 PetscFunctionReturn(0); 647 } 648 649 /* ----------------------------------------------------------- */ 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 652 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 653 { 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 655 PetscErrorCode ierr; 656 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 657 PetscScalar *x,*b,*aa = a->a; 658 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 659 PetscInt adiag_i,i,*vi,nz,ai_i; 660 PetscScalar *v,sum; 661 #endif 662 663 PetscFunctionBegin; 664 if (!n) PetscFunctionReturn(0); 665 666 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 667 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 668 669 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 670 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 671 #else 672 /* forward solve the lower triangular */ 673 x[0] = b[0]; 674 for (i=1; i<n; i++) { 675 ai_i = ai[i]; 676 v = aa + ai_i; 677 vi = aj + ai_i; 678 nz = adiag[i] - ai_i; 679 sum = b[i]; 680 while (nz--) sum -= *v++ * x[*vi++]; 681 x[i] = sum; 682 } 683 684 /* backward solve the upper triangular */ 685 for (i=n-1; i>=0; i--){ 686 adiag_i = adiag[i]; 687 v = aa + adiag_i + 1; 688 vi = aj + adiag_i + 1; 689 nz = ai[i+1] - adiag_i - 1; 690 sum = x[i]; 691 while (nz--) sum -= *v++ * x[*vi++]; 692 x[i] = sum*aa[adiag_i]; 693 } 694 #endif 695 PetscLogFlops(2*a->nz - A->n); 696 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 697 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 703 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 704 { 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 706 IS iscol = a->col,isrow = a->row; 707 PetscErrorCode ierr; 708 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 709 PetscInt nz,*rout,*cout; 710 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 711 712 PetscFunctionBegin; 713 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 714 715 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 tmp = a->solve_work; 718 719 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 720 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 721 722 /* forward solve the lower triangular */ 723 tmp[0] = b[*r++]; 724 for (i=1; i<n; i++) { 725 v = aa + ai[i] ; 726 vi = aj + ai[i] ; 727 nz = a->diag[i] - ai[i]; 728 sum = b[*r++]; 729 while (nz--) sum -= *v++ * tmp[*vi++ ]; 730 tmp[i] = sum; 731 } 732 733 /* backward solve the upper triangular */ 734 for (i=n-1; i>=0; i--){ 735 v = aa + a->diag[i] + 1; 736 vi = aj + a->diag[i] + 1; 737 nz = ai[i+1] - a->diag[i] - 1; 738 sum = tmp[i]; 739 while (nz--) sum -= *v++ * tmp[*vi++ ]; 740 tmp[i] = sum*aa[a->diag[i]]; 741 x[*c--] += tmp[i]; 742 } 743 744 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 745 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 746 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 747 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 748 PetscLogFlops(2*a->nz); 749 750 PetscFunctionReturn(0); 751 } 752 /* -------------------------------------------------------------------*/ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 755 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 756 { 757 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 758 IS iscol = a->col,isrow = a->row; 759 PetscErrorCode ierr; 760 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 761 PetscInt nz,*rout,*cout,*diag = a->diag; 762 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 763 764 PetscFunctionBegin; 765 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 tmp = a->solve_work; 768 769 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 770 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 771 772 /* copy the b into temp work space according to permutation */ 773 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 774 775 /* forward solve the U^T */ 776 for (i=0; i<n; i++) { 777 v = aa + diag[i] ; 778 vi = aj + diag[i] + 1; 779 nz = ai[i+1] - diag[i] - 1; 780 s1 = tmp[i]; 781 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 782 while (nz--) { 783 tmp[*vi++ ] -= (*v++)*s1; 784 } 785 tmp[i] = s1; 786 } 787 788 /* backward solve the L^T */ 789 for (i=n-1; i>=0; i--){ 790 v = aa + diag[i] - 1 ; 791 vi = aj + diag[i] - 1 ; 792 nz = diag[i] - ai[i]; 793 s1 = tmp[i]; 794 while (nz--) { 795 tmp[*vi-- ] -= (*v--)*s1; 796 } 797 } 798 799 /* copy tmp into x according to permutation */ 800 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 801 802 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 803 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 804 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 806 807 PetscLogFlops(2*a->nz-A->n); 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 813 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 814 { 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 816 IS iscol = a->col,isrow = a->row; 817 PetscErrorCode ierr; 818 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 819 PetscInt nz,*rout,*cout,*diag = a->diag; 820 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 821 822 PetscFunctionBegin; 823 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 824 825 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 827 tmp = a->solve_work; 828 829 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 830 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 831 832 /* copy the b into temp work space according to permutation */ 833 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 834 835 /* forward solve the U^T */ 836 for (i=0; i<n; i++) { 837 v = aa + diag[i] ; 838 vi = aj + diag[i] + 1; 839 nz = ai[i+1] - diag[i] - 1; 840 tmp[i] *= *v++; 841 while (nz--) { 842 tmp[*vi++ ] -= (*v++)*tmp[i]; 843 } 844 } 845 846 /* backward solve the L^T */ 847 for (i=n-1; i>=0; i--){ 848 v = aa + diag[i] - 1 ; 849 vi = aj + diag[i] - 1 ; 850 nz = diag[i] - ai[i]; 851 while (nz--) { 852 tmp[*vi-- ] -= (*v--)*tmp[i]; 853 } 854 } 855 856 /* copy tmp into x according to permutation */ 857 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 858 859 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 860 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 861 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 863 864 PetscLogFlops(2*a->nz); 865 PetscFunctionReturn(0); 866 } 867 /* ----------------------------------------------------------------*/ 868 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 872 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 873 { 874 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 875 IS isicol; 876 PetscErrorCode ierr; 877 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 878 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 879 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 880 PetscInt incrlev,nnz,i,levels,diagonal_fill; 881 PetscTruth col_identity,row_identity; 882 PetscReal f; 883 884 PetscFunctionBegin; 885 f = info->fill; 886 levels = (PetscInt)info->levels; 887 diagonal_fill = (PetscInt)info->diagonal_fill; 888 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 889 890 /* special case that simply copies fill pattern */ 891 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 892 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 893 if (!levels && row_identity && col_identity) { 894 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 895 (*fact)->factor = FACTOR_LU; 896 b = (Mat_SeqAIJ*)(*fact)->data; 897 if (!b->diag) { 898 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 899 } 900 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 901 b->row = isrow; 902 b->col = iscol; 903 b->icol = isicol; 904 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 905 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 906 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 907 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 912 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 913 914 /* get new row pointers */ 915 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 916 ainew[0] = 0; 917 /* don't know how many column pointers are needed so estimate */ 918 jmax = (PetscInt)(f*(ai[n]+1)); 919 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 920 /* ajfill is level of fill for each fill entry */ 921 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 922 /* fill is a linked list of nonzeros in active row */ 923 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 924 /* im is level for each filled value */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 926 /* dloc is location of diagonal in factor */ 927 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 928 dloc[0] = 0; 929 for (prow=0; prow<n; prow++) { 930 931 /* copy current row into linked list */ 932 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 933 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 934 xi = aj + ai[r[prow]]; 935 fill[n] = n; 936 fill[prow] = -1; /* marker to indicate if diagonal exists */ 937 while (nz--) { 938 fm = n; 939 idx = ic[*xi++ ]; 940 do { 941 m = fm; 942 fm = fill[m]; 943 } while (fm < idx); 944 fill[m] = idx; 945 fill[idx] = fm; 946 im[idx] = 0; 947 } 948 949 /* make sure diagonal entry is included */ 950 if (diagonal_fill && fill[prow] == -1) { 951 fm = n; 952 while (fill[fm] < prow) fm = fill[fm]; 953 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 954 fill[fm] = prow; 955 im[prow] = 0; 956 nzf++; 957 dcount++; 958 } 959 960 nzi = 0; 961 row = fill[n]; 962 while (row < prow) { 963 incrlev = im[row] + 1; 964 nz = dloc[row]; 965 xi = ajnew + ainew[row] + nz + 1; 966 flev = ajfill + ainew[row] + nz + 1; 967 nnz = ainew[row+1] - ainew[row] - nz - 1; 968 fm = row; 969 while (nnz-- > 0) { 970 idx = *xi++ ; 971 if (*flev + incrlev > levels) { 972 flev++; 973 continue; 974 } 975 do { 976 m = fm; 977 fm = fill[m]; 978 } while (fm < idx); 979 if (fm != idx) { 980 im[idx] = *flev + incrlev; 981 fill[m] = idx; 982 fill[idx] = fm; 983 fm = idx; 984 nzf++; 985 } else { 986 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 987 } 988 flev++; 989 } 990 row = fill[row]; 991 nzi++; 992 } 993 /* copy new filled row into permanent storage */ 994 ainew[prow+1] = ainew[prow] + nzf; 995 if (ainew[prow+1] > jmax) { 996 997 /* estimate how much additional space we will need */ 998 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 999 /* just double the memory each time */ 1000 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1001 PetscInt maxadd = jmax; 1002 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1003 jmax += maxadd; 1004 1005 /* allocate a longer ajnew and ajfill */ 1006 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1007 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1008 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1009 ajnew = xi; 1010 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1011 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1012 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1013 ajfill = xi; 1014 reallocs++; /* count how many times we realloc */ 1015 } 1016 xi = ajnew + ainew[prow] ; 1017 flev = ajfill + ainew[prow] ; 1018 dloc[prow] = nzi; 1019 fm = fill[n]; 1020 while (nzf--) { 1021 *xi++ = fm ; 1022 *flev++ = im[fm]; 1023 fm = fill[fm]; 1024 } 1025 /* make sure row has diagonal entry */ 1026 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1027 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1028 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1029 } 1030 } 1031 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1033 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1034 ierr = PetscFree(fill);CHKERRQ(ierr); 1035 ierr = PetscFree(im);CHKERRQ(ierr); 1036 1037 { 1038 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1043 if (diagonal_fill) { 1044 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1045 } 1046 } 1047 1048 /* put together the new matrix */ 1049 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1050 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1051 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1052 PetscLogObjectParent(*fact,isicol); 1053 b = (Mat_SeqAIJ*)(*fact)->data; 1054 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1055 b->singlemalloc = PETSC_FALSE; 1056 len = (ainew[n] )*sizeof(PetscScalar); 1057 /* the next line frees the default space generated by the Create() */ 1058 ierr = PetscFree(b->a);CHKERRQ(ierr); 1059 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1060 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1061 b->j = ajnew; 1062 b->i = ainew; 1063 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1064 b->diag = dloc; 1065 b->ilen = 0; 1066 b->imax = 0; 1067 b->row = isrow; 1068 b->col = iscol; 1069 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1070 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1071 b->icol = isicol; 1072 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1073 /* In b structure: Free imax, ilen, old a, old j. 1074 Allocate dloc, solve_work, new a, new j */ 1075 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1076 b->maxnz = b->nz = ainew[n] ; 1077 (*fact)->factor = FACTOR_LU; 1078 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1079 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1080 (*fact)->info.factor_mallocs = reallocs; 1081 (*fact)->info.fill_ratio_given = f; 1082 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #include "src/mat/impls/sbaij/seq/sbaij.h" 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1089 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1090 { 1091 Mat C = *B; 1092 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1093 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1094 IS ip=b->row; 1095 PetscErrorCode ierr; 1096 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1097 PetscInt *ai=a->i,*aj=a->j; 1098 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1099 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1100 PetscReal zeropivot,shift_amount,rs,shift_nonzero; 1101 PetscTruth chshift,shift_pd; 1102 PetscInt nshift=0; 1103 1104 PetscFunctionBegin; 1105 shift_nonzero = info->shiftnz; 1106 shift_pd = info->shiftpd; 1107 zeropivot = info->zeropivot; 1108 1109 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1110 1111 /* initialization */ 1112 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1113 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1114 jl = il + mbs; 1115 rtmp = (MatScalar*)(jl + mbs); 1116 1117 shift_amount = 0; 1118 do { 1119 chshift = PETSC_FALSE; 1120 for (i=0; i<mbs; i++) { 1121 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1122 } 1123 1124 for (k = 0; k<mbs; k++){ 1125 bval = ba + bi[k]; 1126 /* initialize k-th row by the perm[k]-th row of A */ 1127 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1128 for (j = jmin; j < jmax; j++){ 1129 col = rip[aj[j]]; 1130 if (col >= k){ /* only take upper triangular entry */ 1131 rtmp[col] = aa[j]; 1132 *bval++ = 0.0; /* for in-place factorization */ 1133 } 1134 } 1135 /* shift the diagonal of the matrix */ 1136 if (nshift) rtmp[k] += shift_amount; 1137 1138 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1139 dk = rtmp[k]; 1140 i = jl[k]; /* first row to be added to k_th row */ 1141 1142 while (i < k){ 1143 nexti = jl[i]; /* next row to be added to k_th row */ 1144 1145 /* compute multiplier, update diag(k) and U(i,k) */ 1146 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1147 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1148 dk += uikdi*ba[ili]; 1149 ba[ili] = uikdi; /* -U(i,k) */ 1150 1151 /* add multiple of row i to k-th row */ 1152 jmin = ili + 1; jmax = bi[i+1]; 1153 if (jmin < jmax){ 1154 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1155 /* update il and jl for row i */ 1156 il[i] = jmin; 1157 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1158 } 1159 i = nexti; 1160 } 1161 1162 /* shift the diagonals when zero pivot is detected */ 1163 /* compute rs=sum of abs(off-diagonal) */ 1164 rs = 0.0; 1165 jmin = bi[k]+1; 1166 nz = bi[k+1] - jmin; 1167 if (nz){ 1168 bcol = bj + jmin; 1169 while (nz--){ 1170 rs += PetscAbsScalar(rtmp[*bcol++]); 1171 } 1172 } 1173 if (PetscAbsScalar(dk) <= zeropivot*rs && shift_nonzero){ 1174 /* force |diag(*B)| > zeropivot*rs */ 1175 if (!nshift){ 1176 shift_amount = shift_nonzero; 1177 } else { 1178 shift_amount *= 2.0; 1179 } 1180 chshift = PETSC_TRUE; 1181 nshift++; 1182 break; 1183 } else if (PetscRealPart(dk) <= zeropivot*rs && shift_pd){ 1184 /* calculate a shift that would make this row diagonally dominant */ 1185 shift_amount = PetscMax(rs+PetscAbs(PetscRealPart(dk)),1.1*shift_amount); 1186 chshift = PETSC_TRUE; 1187 /* Unlike in the ILU case there is no exit condition on nshift: 1188 we increase the shift until it converges. There is no guarantee that 1189 this algorithm converges faster or slower, or is better or worse 1190 than the ILU algorithm. */ 1191 nshift++; 1192 break; 1193 } else if (PetscAbsScalar(dk) <= zeropivot*rs){ 1194 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1195 } 1196 1197 /* copy data into U(k,:) */ 1198 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1199 jmin = bi[k]+1; jmax = bi[k+1]; 1200 if (jmin < jmax) { 1201 for (j=jmin; j<jmax; j++){ 1202 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1203 } 1204 /* add the k-th row into il and jl */ 1205 il[k] = jmin; 1206 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1207 } 1208 } 1209 } while (chshift); 1210 ierr = PetscFree(il);CHKERRQ(ierr); 1211 1212 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1213 C->factor = FACTOR_CHOLESKY; 1214 C->assembled = PETSC_TRUE; 1215 C->preallocated = PETSC_TRUE; 1216 PetscLogFlops(C->m); 1217 if (nshift){ 1218 if (shift_nonzero) { 1219 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_nonzero tries %D, shift_amount %g\n",nshift,shift_amount); 1220 } else if (shift_pd) { 1221 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g\n",nshift,shift_amount); 1222 } 1223 } 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #include "petscbt.h" 1228 #include "src/mat/utils/freespace.h" 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1231 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1232 { 1233 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1234 Mat_SeqSBAIJ *b; 1235 Mat B; 1236 PetscErrorCode ierr; 1237 PetscTruth perm_identity; 1238 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1239 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1240 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1241 PetscInt ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1242 PetscReal fill=info->fill,levels=info->levels; 1243 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1244 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1245 PetscBT lnkbt; 1246 1247 PetscFunctionBegin; 1248 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1249 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1250 1251 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1252 ui[0] = 0; 1253 1254 /* special case that simply copies fill pattern */ 1255 if (!levels && perm_identity) { 1256 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1257 for (i=0; i<am; i++) { 1258 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1259 } 1260 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1261 cols = uj; 1262 for (i=0; i<am; i++) { 1263 aj = a->j + a->diag[i]; 1264 ncols = ui[i+1] - ui[i]; 1265 for (j=0; j<ncols; j++) *cols++ = *aj++; 1266 } 1267 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1268 /* initialization */ 1269 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1270 1271 /* jl: linked list for storing indices of the pivot rows 1272 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1273 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1274 il = jl + am; 1275 uj_ptr = (PetscInt**)(il + am); 1276 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1277 for (i=0; i<am; i++){ 1278 jl[i] = am; il[i] = 0; 1279 } 1280 1281 /* create and initialize a linked list for storing column indices of the active row k */ 1282 nlnk = am + 1; 1283 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1284 1285 /* initial FreeSpace size is fill*(ai[am]+1) */ 1286 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1287 current_space = free_space; 1288 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1289 current_space_lvl = free_space_lvl; 1290 1291 for (k=0; k<am; k++){ /* for each active row k */ 1292 /* initialize lnk by the column indices of row rip[k] of A */ 1293 nzk = 0; 1294 ncols = ai[rip[k]+1] - ai[rip[k]]; 1295 ncols_upper = 0; 1296 cols = cols_lvl + am; 1297 for (j=0; j<ncols; j++){ 1298 i = rip[*(aj + ai[rip[k]] + j)]; 1299 if (i >= k){ /* only take upper triangular entry */ 1300 cols[ncols_upper] = i; 1301 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1302 ncols_upper++; 1303 } 1304 } 1305 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1306 nzk += nlnk; 1307 1308 /* update lnk by computing fill-in for each pivot row to be merged in */ 1309 prow = jl[k]; /* 1st pivot row */ 1310 1311 while (prow < k){ 1312 nextprow = jl[prow]; 1313 1314 /* merge prow into k-th row */ 1315 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1316 jmax = ui[prow+1]; 1317 ncols = jmax-jmin; 1318 i = jmin - ui[prow]; 1319 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1320 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1321 ierr = PetscIncompleteLLAdd(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1322 nzk += nlnk; 1323 1324 /* update il and jl for prow */ 1325 if (jmin < jmax){ 1326 il[prow] = jmin; 1327 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1328 } 1329 prow = nextprow; 1330 } 1331 1332 /* if free space is not available, make more free space */ 1333 if (current_space->local_remaining<nzk) { 1334 i = am - k + 1; /* num of unfactored rows */ 1335 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1336 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1337 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1338 reallocs++; 1339 } 1340 1341 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1342 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1343 1344 /* add the k-th row into il and jl */ 1345 if (nzk > 1){ 1346 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1347 jl[k] = jl[i]; jl[i] = k; 1348 il[k] = ui[k] + 1; 1349 } 1350 uj_ptr[k] = current_space->array; 1351 uj_lvl_ptr[k] = current_space_lvl->array; 1352 1353 current_space->array += nzk; 1354 current_space->local_used += nzk; 1355 current_space->local_remaining -= nzk; 1356 1357 current_space_lvl->array += nzk; 1358 current_space_lvl->local_used += nzk; 1359 current_space_lvl->local_remaining -= nzk; 1360 1361 ui[k+1] = ui[k] + nzk; 1362 } 1363 1364 if (ai[am] != 0) { 1365 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1366 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1367 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1368 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1369 } else { 1370 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1371 } 1372 1373 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1374 ierr = PetscFree(jl);CHKERRQ(ierr); 1375 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1376 1377 /* destroy list of free space and other temporary array(s) */ 1378 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1379 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1380 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1381 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1382 1383 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1384 1385 /* put together the new matrix in MATSEQSBAIJ format */ 1386 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1387 B = *fact; 1388 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1389 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1390 1391 b = (Mat_SeqSBAIJ*)B->data; 1392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1393 b->singlemalloc = PETSC_FALSE; 1394 /* the next line frees the default space generated by the Create() */ 1395 ierr = PetscFree(b->a);CHKERRQ(ierr); 1396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1397 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1398 b->j = uj; 1399 b->i = ui; 1400 b->diag = 0; 1401 b->ilen = 0; 1402 b->imax = 0; 1403 b->row = perm; 1404 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1405 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1406 b->icol = perm; 1407 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1408 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1409 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1410 b->maxnz = b->nz = ui[am]; 1411 1412 B->factor = FACTOR_CHOLESKY; 1413 B->info.factor_mallocs = reallocs; 1414 B->info.fill_ratio_given = fill; 1415 if (ai[am] != 0) { 1416 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1417 } else { 1418 B->info.fill_ratio_needed = 0.0; 1419 } 1420 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1421 if (perm_identity){ 1422 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1423 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1430 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1431 { 1432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1433 Mat_SeqSBAIJ *b; 1434 Mat B; 1435 PetscErrorCode ierr; 1436 PetscTruth perm_identity; 1437 PetscReal fill = info->fill; 1438 PetscInt *rip,*riip,i,mbs=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1439 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1440 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1441 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1442 PetscBT lnkbt; 1443 IS iperm; 1444 1445 PetscFunctionBegin; 1446 /* check whether perm is the identity mapping */ 1447 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1448 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1449 1450 if (!perm_identity){ 1451 /* check if perm is symmetric! */ 1452 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1453 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1454 for (i=0; i<mbs; i++) { 1455 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1456 } 1457 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1458 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1459 } 1460 1461 /* initialization */ 1462 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1463 ui[0] = 0; 1464 1465 /* jl: linked list for storing indices of the pivot rows 1466 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1467 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1468 il = jl + mbs; 1469 cols = il + mbs; 1470 ui_ptr = (PetscInt**)(cols + mbs); 1471 for (i=0; i<mbs; i++){ 1472 jl[i] = mbs; il[i] = 0; 1473 } 1474 1475 /* create and initialize a linked list for storing column indices of the active row k */ 1476 nlnk = mbs + 1; 1477 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1478 1479 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 1480 ierr = GetMoreSpace((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 1481 current_space = free_space; 1482 1483 for (k=0; k<mbs; k++){ /* for each active row k */ 1484 /* initialize lnk by the column indices of row rip[k] of A */ 1485 nzk = 0; 1486 ncols = ai[rip[k]+1] - ai[rip[k]]; 1487 ncols_upper = 0; 1488 for (j=0; j<ncols; j++){ 1489 i = rip[*(aj + ai[rip[k]] + j)]; 1490 if (i >= k){ /* only take upper triangular entry */ 1491 cols[ncols_upper] = i; 1492 ncols_upper++; 1493 } 1494 } 1495 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1496 nzk += nlnk; 1497 1498 /* update lnk by computing fill-in for each pivot row to be merged in */ 1499 prow = jl[k]; /* 1st pivot row */ 1500 1501 while (prow < k){ 1502 nextprow = jl[prow]; 1503 /* merge prow into k-th row */ 1504 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1505 jmax = ui[prow+1]; 1506 ncols = jmax-jmin; 1507 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1508 ierr = PetscLLAdd(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1509 nzk += nlnk; 1510 1511 /* update il and jl for prow */ 1512 if (jmin < jmax){ 1513 il[prow] = jmin; 1514 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1515 } 1516 prow = nextprow; 1517 } 1518 1519 /* if free space is not available, make more free space */ 1520 if (current_space->local_remaining<nzk) { 1521 i = mbs - k + 1; /* num of unfactored rows */ 1522 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1523 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1524 reallocs++; 1525 } 1526 1527 /* copy data into free space, then initialize lnk */ 1528 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1529 1530 /* add the k-th row into il and jl */ 1531 if (nzk-1 > 0){ 1532 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1533 jl[k] = jl[i]; jl[i] = k; 1534 il[k] = ui[k] + 1; 1535 } 1536 ui_ptr[k] = current_space->array; 1537 current_space->array += nzk; 1538 current_space->local_used += nzk; 1539 current_space->local_remaining -= nzk; 1540 1541 ui[k+1] = ui[k] + nzk; 1542 } 1543 1544 if (ai[mbs] != 0) { 1545 PetscReal af = ((PetscReal)(2*ui[mbs]-mbs))/((PetscReal)ai[mbs]); 1546 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1547 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1548 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1549 } else { 1550 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1551 } 1552 1553 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1554 ierr = PetscFree(jl);CHKERRQ(ierr); 1555 1556 /* destroy list of free space and other temporary array(s) */ 1557 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1558 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1559 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1560 1561 /* put together the new matrix in MATSEQSBAIJ format */ 1562 ierr = MatCreate(PETSC_COMM_SELF,mbs,mbs,mbs,mbs,fact);CHKERRQ(ierr); 1563 B = *fact; 1564 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1565 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1566 1567 b = (Mat_SeqSBAIJ*)B->data; 1568 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1569 b->singlemalloc = PETSC_FALSE; 1570 /* the next line frees the default space generated by the Create() */ 1571 ierr = PetscFree(b->a);CHKERRQ(ierr); 1572 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1573 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1574 b->j = uj; 1575 b->i = ui; 1576 b->diag = 0; 1577 b->ilen = 0; 1578 b->imax = 0; 1579 b->row = perm; 1580 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1581 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1582 b->icol = perm; 1583 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1584 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1585 PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar))); 1586 b->maxnz = b->nz = ui[mbs]; 1587 1588 B->factor = FACTOR_CHOLESKY; 1589 B->info.factor_mallocs = reallocs; 1590 B->info.fill_ratio_given = fill; 1591 if (ai[mbs] != 0) { 1592 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 1593 } else { 1594 B->info.fill_ratio_needed = 0.0; 1595 } 1596 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1597 if (perm_identity){ 1598 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1599 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603