1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 442 PetscTruth damp,lushift; 443 444 PetscFunctionBegin; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 455 if (b->lu_shift) { /* set max shift */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 for (j=0; j<aai[i+1]-aai[i]; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 shift_fraction = 0; shift_amount = 0; 474 do { 475 damp = PETSC_FALSE; 476 lushift = PETSC_FALSE; 477 for (i=0; i<n; i++) { 478 nz = ai[i+1] - ai[i]; 479 ajtmp = aj + ai[i]; 480 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmpold = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmpold[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 490 491 row = *ajtmp++ ; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = ai[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 PetscLogFlops(2*nz); 502 } 503 row = *ajtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + ai[i] ; 507 pj = b->j + ai[i] ; 508 nz = ai[i+1] - ai[i]; 509 diag = diag_offset[i] - ai[i]; 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 #define MAX_NSHIFT 5 517 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 518 if (nshift>MAX_NSHIFT) { 519 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 520 } else if (nshift==MAX_NSHIFT) { 521 shift_fraction = shift_hi; 522 lushift = PETSC_FALSE; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 lushift = PETSC_TRUE; 526 } 527 shift_amount = shift_fraction * shift_top; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if no shift in this attempt & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 580 { 581 PetscFunctionBegin; 582 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 583 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 584 PetscFunctionReturn(0); 585 } 586 587 588 /* ----------------------------------------------------------- */ 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatLUFactor_SeqAIJ" 591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 592 { 593 PetscErrorCode ierr; 594 Mat C; 595 596 PetscFunctionBegin; 597 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 598 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 599 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 600 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 601 PetscFunctionReturn(0); 602 } 603 /* ----------------------------------------------------------- */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatSolve_SeqAIJ" 606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 IS iscol = a->col,isrow = a->row; 610 PetscErrorCode ierr; 611 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 612 PetscInt nz,*rout,*cout; 613 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 614 615 PetscFunctionBegin; 616 if (!n) PetscFunctionReturn(0); 617 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 tmp = a->solve_work; 621 622 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 623 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 624 625 /* forward solve the lower triangular */ 626 tmp[0] = b[*r++]; 627 tmps = tmp; 628 for (i=1; i<n; i++) { 629 v = aa + ai[i] ; 630 vi = aj + ai[i] ; 631 nz = a->diag[i] - ai[i]; 632 sum = b[*r++]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 tmp[i] = sum; 635 } 636 637 /* backward solve the upper triangular */ 638 for (i=n-1; i>=0; i--){ 639 v = aa + a->diag[i] + 1; 640 vi = aj + a->diag[i] + 1; 641 nz = ai[i+1] - a->diag[i] - 1; 642 sum = tmp[i]; 643 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 644 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 645 } 646 647 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 649 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 651 PetscLogFlops(2*a->nz - A->n); 652 PetscFunctionReturn(0); 653 } 654 655 /* ----------------------------------------------------------- */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 663 PetscScalar *x,*b,*aa = a->a; 664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 665 PetscInt adiag_i,i,*vi,nz,ai_i; 666 PetscScalar *v,sum; 667 #endif 668 669 PetscFunctionBegin; 670 if (!n) PetscFunctionReturn(0); 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PetscLogFlops(2*a->nz - A->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 PetscErrorCode ierr; 714 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 715 PetscInt nz,*rout,*cout; 716 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 717 718 PetscFunctionBegin; 719 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 720 721 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 tmp = a->solve_work; 724 725 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 726 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 727 728 /* forward solve the lower triangular */ 729 tmp[0] = b[*r++]; 730 for (i=1; i<n; i++) { 731 v = aa + ai[i] ; 732 vi = aj + ai[i] ; 733 nz = a->diag[i] - ai[i]; 734 sum = b[*r++]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum; 737 } 738 739 /* backward solve the upper triangular */ 740 for (i=n-1; i>=0; i--){ 741 v = aa + a->diag[i] + 1; 742 vi = aj + a->diag[i] + 1; 743 nz = ai[i+1] - a->diag[i] - 1; 744 sum = tmp[i]; 745 while (nz--) sum -= *v++ * tmp[*vi++ ]; 746 tmp[i] = sum*aa[a->diag[i]]; 747 x[*c--] += tmp[i]; 748 } 749 750 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 752 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 754 PetscLogFlops(2*a->nz); 755 756 PetscFunctionReturn(0); 757 } 758 /* -------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout,*diag = a->diag; 768 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 773 tmp = a->solve_work; 774 775 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 776 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 777 778 /* copy the b into temp work space according to permutation */ 779 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 780 781 /* forward solve the U^T */ 782 for (i=0; i<n; i++) { 783 v = aa + diag[i] ; 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = tmp[i]; 787 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 788 while (nz--) { 789 tmp[*vi++ ] -= (*v++)*s1; 790 } 791 tmp[i] = s1; 792 } 793 794 /* backward solve the L^T */ 795 for (i=n-1; i>=0; i--){ 796 v = aa + diag[i] - 1 ; 797 vi = aj + diag[i] - 1 ; 798 nz = diag[i] - ai[i]; 799 s1 = tmp[i]; 800 while (nz--) { 801 tmp[*vi-- ] -= (*v--)*s1; 802 } 803 } 804 805 /* copy tmp into x according to permutation */ 806 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 807 808 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 809 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 813 PetscLogFlops(2*a->nz-A->n); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 820 { 821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 822 IS iscol = a->col,isrow = a->row; 823 PetscErrorCode ierr; 824 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 825 PetscInt nz,*rout,*cout,*diag = a->diag; 826 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 827 828 PetscFunctionBegin; 829 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 830 831 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 833 tmp = a->solve_work; 834 835 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 836 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 837 838 /* copy the b into temp work space according to permutation */ 839 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 840 841 /* forward solve the U^T */ 842 for (i=0; i<n; i++) { 843 v = aa + diag[i] ; 844 vi = aj + diag[i] + 1; 845 nz = ai[i+1] - diag[i] - 1; 846 tmp[i] *= *v++; 847 while (nz--) { 848 tmp[*vi++ ] -= (*v++)*tmp[i]; 849 } 850 } 851 852 /* backward solve the L^T */ 853 for (i=n-1; i>=0; i--){ 854 v = aa + diag[i] - 1 ; 855 vi = aj + diag[i] - 1 ; 856 nz = diag[i] - ai[i]; 857 while (nz--) { 858 tmp[*vi-- ] -= (*v--)*tmp[i]; 859 } 860 } 861 862 /* copy tmp into x according to permutation */ 863 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 864 865 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 866 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 867 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 869 870 PetscLogFlops(2*a->nz); 871 PetscFunctionReturn(0); 872 } 873 /* ----------------------------------------------------------------*/ 874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 879 { 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 881 IS isicol; 882 PetscErrorCode ierr; 883 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 884 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 885 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 886 PetscInt incrlev,nnz,i,levels,diagonal_fill; 887 PetscTruth col_identity,row_identity; 888 PetscReal f; 889 890 PetscFunctionBegin; 891 f = info->fill; 892 levels = (PetscInt)info->levels; 893 diagonal_fill = (PetscInt)info->diagonal_fill; 894 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 895 896 /* special case that simply copies fill pattern */ 897 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 898 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 899 if (!levels && row_identity && col_identity) { 900 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 901 (*fact)->factor = FACTOR_LU; 902 b = (Mat_SeqAIJ*)(*fact)->data; 903 if (!b->diag) { 904 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 905 } 906 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 907 b->row = isrow; 908 b->col = iscol; 909 b->icol = isicol; 910 b->lu_damping = info->damping; 911 b->lu_zeropivot = info->zeropivot; 912 b->lu_shift = info->shift; 913 b->lu_shift_fraction= info->shift_fraction; 914 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 926 ainew[0] = 0; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (PetscInt)(f*(ai[n]+1)); 929 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 930 /* ajfill is level of fill for each fill entry */ 931 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 932 /* fill is a linked list of nonzeros in active row */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 934 /* im is level for each filled value */ 935 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 936 /* dloc is location of diagonal in factor */ 937 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] ; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ ]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + nz + 1; 976 flev = ajfill + ainew[row] + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ ; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 PetscInt maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 reallocs++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] ; 1027 flev = ajfill + ainew[prow] ; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm ; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1060 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1061 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1062 PetscLogObjectParent(*fact,isicol); 1063 b = (Mat_SeqAIJ*)(*fact)->data; 1064 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1065 b->singlemalloc = PETSC_FALSE; 1066 len = (ainew[n] )*sizeof(PetscScalar); 1067 /* the next line frees the default space generated by the Create() */ 1068 ierr = PetscFree(b->a);CHKERRQ(ierr); 1069 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1070 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1071 b->j = ajnew; 1072 b->i = ainew; 1073 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1074 b->diag = dloc; 1075 b->ilen = 0; 1076 b->imax = 0; 1077 b->row = isrow; 1078 b->col = iscol; 1079 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1081 b->icol = isicol; 1082 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1083 /* In b structure: Free imax, ilen, old a, old j. 1084 Allocate dloc, solve_work, new a, new j */ 1085 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1086 b->maxnz = b->nz = ainew[n] ; 1087 b->lu_damping = info->damping; 1088 b->lu_shift = info->shift; 1089 b->lu_shift_fraction = info->shift_fraction; 1090 b->lu_zeropivot = info->zeropivot; 1091 (*fact)->factor = FACTOR_LU; 1092 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1093 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1094 1095 (*fact)->info.factor_mallocs = reallocs; 1096 (*fact)->info.fill_ratio_given = f; 1097 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #include "src/mat/impls/sbaij/seq/sbaij.h" 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1104 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1105 { 1106 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1107 PetscErrorCode ierr,(*f)(Mat,Mat*); 1108 1109 PetscFunctionBegin; 1110 printf(" MatCholeskyFactorNumeric_SeqAIJ ...\n"); 1111 if (!a->sbaijMat){ 1112 ierr = MatConvert_SeqAIJ_SeqSBAIJ(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1113 } 1114 ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 1115 ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr); 1116 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1117 a->sbaijMat = PETSC_NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering" 1122 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering(Mat A,Mat *fact) 1123 { 1124 Mat C = *fact; 1125 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1126 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1127 PetscErrorCode ierr; 1128 PetscInt i,j,am = A->m; 1129 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1130 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1131 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1132 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 1133 PetscTruth damp,chshift; 1134 PetscInt nshift=0; 1135 1136 PetscFunctionBegin; 1137 /* initialization */ 1138 /* il and jl record the first nonzero element in each row of the accessing 1139 window U(0:k, k:am-1). 1140 jl: list of rows to be added to uneliminated rows 1141 i>= k: jl(i) is the first row to be added to row i 1142 i< k: jl(i) is the row following row i in some list of rows 1143 jl(i) = am indicates the end of a list 1144 il(i): points to the first nonzero element in U(i,k:am-1) 1145 */ 1146 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 1147 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1148 jl = il + am; 1149 rtmp = (MatScalar*)(jl + am); 1150 1151 shift_amount = 0; 1152 do { 1153 damp = PETSC_FALSE; 1154 chshift = PETSC_FALSE; 1155 for (i=0; i<am; i++) { 1156 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 1157 } 1158 1159 for (k = 0; k<am; k++){ 1160 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1161 nz = ai[k+1] - ai[k]; 1162 acol = aj + ai[k]; 1163 aval = aa + ai[k]; 1164 bval = ba + bi[k]; 1165 while (nz -- ){ 1166 if (*acol < k) { /* skip lower triangular entries */ 1167 acol++; aval++; 1168 } else { 1169 rtmp[*acol++] = *aval++; 1170 *bval++ = 0.0; /* for in-place factorization */ 1171 } 1172 } 1173 /* damp the diagonal of the matrix */ 1174 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1175 1176 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1177 dk = rtmp[k]; 1178 i = jl[k]; /* first row to be added to k_th row */ 1179 1180 while (i < k){ 1181 nexti = jl[i]; /* next row to be added to k_th row */ 1182 /* compute multiplier, update D(k) and U(i,k) */ 1183 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1184 uikdi = - ba[ili]*ba[bi[i]]; 1185 dk += uikdi*ba[ili]; 1186 ba[ili] = uikdi; /* -U(i,k) */ 1187 1188 /* add multiple of row i to k-th row ... */ 1189 jmin = ili + 1; 1190 nz = bi[i+1] - jmin; 1191 if (nz > 0){ 1192 bcol = bj + jmin; 1193 bval = ba + jmin; 1194 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1195 /* update il and jl for i-th row */ 1196 il[i] = jmin; 1197 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1198 } 1199 i = nexti; 1200 } 1201 1202 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1203 /* calculate a shift that would make this row diagonally dominant */ 1204 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1205 jmin = bi[k]+1; 1206 nz = bi[k+1] - jmin; 1207 if (nz){ 1208 bcol = bj + jmin; 1209 bval = ba + jmin; 1210 while (nz--){ 1211 rs += PetscAbsScalar(rtmp[*bcol++]); 1212 } 1213 } 1214 /* if this shift is less than the previous, just up the previous 1215 one by a bit */ 1216 shift_amount = PetscMax(rs,1.1*shift_amount); 1217 chshift = PETSC_TRUE; 1218 /* Unlike in the ILU case there is no exit condition on nshift: 1219 we increase the shift until it converges. There is no guarantee that 1220 this algorithm converges faster or slower, or is better or worse 1221 than the ILU algorithm. */ 1222 nshift++; 1223 break; 1224 } 1225 if (PetscRealPart(dk) < zeropivot){ 1226 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1227 if (damping > 0.0) { 1228 if (ndamp) damping *= 2.0; 1229 damp = PETSC_TRUE; 1230 ndamp++; 1231 break; 1232 } else if (PetscAbsScalar(dk) < zeropivot){ 1233 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1234 } else { 1235 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k); 1236 } 1237 } 1238 1239 /* copy data into U(k,:) */ 1240 ba[bi[k]] = 1.0/dk; 1241 jmin = bi[k]+1; 1242 nz = bi[k+1] - jmin; 1243 if (nz){ 1244 bcol = bj + jmin; 1245 bval = ba + jmin; 1246 while (nz--){ 1247 *bval++ = rtmp[*bcol]; 1248 rtmp[*bcol++] = 0.0; 1249 } 1250 /* add k-th row into il and jl */ 1251 il[k] = jmin; 1252 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1253 } 1254 } 1255 } while (damp||chshift); 1256 ierr = PetscFree(il);CHKERRQ(ierr); 1257 1258 C->factor = FACTOR_CHOLESKY; 1259 C->assembled = PETSC_TRUE; 1260 C->preallocated = PETSC_TRUE; 1261 PetscLogFlops(C->m); 1262 if (ndamp) { 1263 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqAIJ_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping); 1264 } 1265 if (nshift) { 1266 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering diagonal shifted %D shifts\n",nshift); 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #include "petscbt.h" 1272 #include "src/mat/utils/freespace.h" 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1275 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1276 { 1277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1278 Mat_SeqSBAIJ *b; 1279 Mat B; 1280 PetscErrorCode ierr; 1281 PetscTruth perm_identity; 1282 PetscInt reallocs=0,*rip,i,*ai,*aj,am=A->m,*ui; 1283 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1284 PetscInt nlnk,*lnk,*lnk_lvl,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1285 PetscReal fill=info->fill,levels=info->levels; 1286 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1287 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1288 PetscBT lnkbt; 1289 1290 PetscFunctionBegin; 1291 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1292 if (!perm_identity){ 1293 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1294 } 1295 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1296 ai = a->i; aj = a->j; 1297 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1298 1299 /* levels = 0: simply copies fill pattern */ 1300 if (!levels) { 1301 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1302 for (i=0; i<am; i++) { 1303 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1304 } 1305 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1306 B = *fact; 1307 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1308 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1309 1310 b = (Mat_SeqSBAIJ*)B->data; 1311 uj = b->j; 1312 for (i=0; i<am; i++) { 1313 aj = a->j + a->diag[i]; 1314 for (j=0; j<ui[i]; j++){ 1315 *uj++ = *aj++; 1316 } 1317 b->ilen[i] = ui[i]; 1318 } 1319 ierr = PetscFree(ui);CHKERRQ(ierr); 1320 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1321 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 1323 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1324 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1325 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1326 PetscFunctionReturn(0); 1327 } 1328 1329 /* initialization */ 1330 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1331 ui[0] = 0; 1332 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 1333 1334 /* jl: linked list for storing indices of the pivot rows 1335 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1336 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1337 il = jl + am; 1338 uj_ptr = (PetscInt**)(il + am); 1339 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1340 for (i=0; i<am; i++){ 1341 jl[i] = am; il[i] = 0; 1342 } 1343 1344 /* create and initialize a linked list for storing column indices of the active row k */ 1345 nlnk = am + 1; 1346 ierr = PetscLLCreate_PermutedLeveled(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1347 1348 /* initial FreeSpace size is fill*(ai[am]+1) */ 1349 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1350 current_space = free_space; 1351 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1352 current_space_lvl = free_space_lvl; 1353 1354 for (k=0; k<am; k++){ /* for each active row k */ 1355 /* initialize lnk by the column indices of row rip[k] of A */ 1356 nzk = 0; 1357 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* changes for !perm_identity */ 1358 cols = aj + a->diag[rip[k]]; 1359 for (j=0; j<ncols; j++) cols_lvl[j] = -1; /* initialize level for nonzero entries */ 1360 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1361 nzk += nlnk; 1362 1363 /* update lnk by computing fill-in for each pivot row to be merged in */ 1364 prow = jl[k]; /* 1st pivot row */ 1365 1366 while (prow < k){ 1367 nextprow = jl[prow]; 1368 1369 /* merge prow into k-th row */ 1370 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1371 jmax = ui[prow+1]; 1372 ncols = jmax-jmin; 1373 i = jmin - ui[prow]; 1374 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1375 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1376 ierr = PetscLLAdd_PermutedLeveled(ncols,cols,levels,cols_lvl,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1377 nzk += nlnk; 1378 1379 /* update il and jl for prow */ 1380 if (jmin < jmax){ 1381 il[prow] = jmin; 1382 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1383 } 1384 prow = nextprow; 1385 } 1386 1387 /* if free space is not available, make more free space */ 1388 if (current_space->local_remaining<nzk) { 1389 i = am - k + 1; /* num of unfactored rows */ 1390 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1391 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1392 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1393 reallocs++; 1394 } 1395 1396 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1397 ierr = PetscLLClean_PermutedLeveled(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1398 1399 /* add the k-th row into il and jl */ 1400 if (nzk-1 > 0){ 1401 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1402 jl[k] = jl[i]; jl[i] = k; 1403 il[k] = ui[k] + 1; 1404 } 1405 uj_ptr[k] = current_space->array; 1406 uj_lvl_ptr[k] = current_space_lvl->array; 1407 1408 current_space->array += nzk; 1409 current_space->local_used += nzk; 1410 current_space->local_remaining -= nzk; 1411 1412 current_space_lvl->array += nzk; 1413 current_space_lvl->local_used += nzk; 1414 current_space_lvl->local_remaining -= nzk; 1415 1416 ui[k+1] = ui[k] + nzk; 1417 } 1418 1419 if (ai[am] != 0) { 1420 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1421 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1422 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1423 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1424 } else { 1425 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1426 } 1427 1428 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1429 ierr = PetscFree(jl);CHKERRQ(ierr); 1430 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1431 1432 /* destroy list of free space and other temporary array(s) */ 1433 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1434 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1435 ierr = PetscLLDestroy_PermutedLeveled(lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1436 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1437 1438 /* put together the new matrix in MATSEQSBAIJ format */ 1439 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1440 B = *fact; 1441 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1442 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1443 1444 b = (Mat_SeqSBAIJ*)B->data; 1445 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1446 b->singlemalloc = PETSC_FALSE; 1447 /* the next line frees the default space generated by the Create() */ 1448 ierr = PetscFree(b->a);CHKERRQ(ierr); 1449 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1450 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1451 b->j = uj; 1452 b->i = ui; 1453 b->diag = 0; 1454 b->ilen = 0; 1455 b->imax = 0; 1456 b->row = perm; 1457 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1458 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1459 b->icol = perm; 1460 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1461 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1462 PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1463 b->maxnz = b->nz = ui[am]; 1464 1465 B->factor = FACTOR_CHOLESKY; 1466 B->info.factor_mallocs = reallocs; 1467 B->info.fill_ratio_given = fill; 1468 if (ai[am] != 0) { 1469 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1470 } else { 1471 B->info.fill_ratio_needed = 0.0; 1472 } 1473 1474 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1475 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1476 ierr = PetscObjectComposeFunction((PetscObject) B,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)B->ops->choleskyfactornumeric);CHKERRQ(ierr); 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1483 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1484 { 1485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1486 Mat_SeqSBAIJ *b; 1487 PetscErrorCode ierr; 1488 PetscTruth perm_identity; 1489 PetscReal fill = info->fill; 1490 PetscInt *rip,i,am=A->m,*ai,*aj,reallocs=0,prow; 1491 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1492 PetscInt nlnk,*lnk,ncols,*cols,*uj,**uj_ptr; 1493 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1494 PetscBT lnkbt; 1495 1496 PetscFunctionBegin; 1497 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1498 if (!perm_identity){ 1499 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1500 } 1501 1502 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1503 ai = a->i; aj = a->j; 1504 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1505 1506 /* initialization */ 1507 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1508 ui[0] = 0; 1509 1510 /* jl: linked list for storing indices of the pivot rows 1511 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1512 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1513 il = jl + am; 1514 uj_ptr = (PetscInt**)(il + am); 1515 for (i=0; i<am; i++){ 1516 jl[i] = am; il[i] = 0; 1517 } 1518 1519 /* create and initialize a linked list for storing column indices of the active row k */ 1520 nlnk = am + 1; 1521 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1522 1523 /* initial FreeSpace size is fill*(ai[am]+1) */ 1524 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1525 current_space = free_space; 1526 1527 for (k=0; k<am; k++){ /* for each active row k */ 1528 /* initialize lnk by the column indices of row rip[k] of A */ 1529 nzk = 0; 1530 ncols = ai[rip[k]+1] - a->diag[rip[k]]; /* diag would change for !perm_identity */ 1531 cols = aj + a->diag[rip[k]]; 1532 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1533 nzk += nlnk; 1534 1535 /* update lnk by computing fill-in for each pivot row to be merged in */ 1536 prow = jl[k]; /* 1st pivot row */ 1537 1538 while (prow < k){ 1539 nextprow = jl[prow]; 1540 1541 /* merge prow into k-th row */ 1542 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1543 jmax = ui[prow+1]; 1544 ncols = jmax-jmin; 1545 cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1546 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1547 nzk += nlnk; 1548 1549 /* update il and jl for prow */ 1550 if (jmin < jmax){ 1551 il[prow] = jmin; 1552 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1553 } 1554 prow = nextprow; 1555 } 1556 1557 /* if free space is not available, make more free space */ 1558 if (current_space->local_remaining<nzk) { 1559 i = am - k + 1; /* num of unfactored rows */ 1560 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1561 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1562 reallocs++; 1563 } 1564 1565 /* copy data into free space, then initialize lnk */ 1566 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1567 1568 /* add the k-th row into il and jl */ 1569 if (nzk-1 > 0){ 1570 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1571 jl[k] = jl[i]; jl[i] = k; 1572 il[k] = ui[k] + 1; 1573 } 1574 uj_ptr[k] = current_space->array; 1575 current_space->array += nzk; 1576 current_space->local_used += nzk; 1577 current_space->local_remaining -= nzk; 1578 1579 ui[k+1] = ui[k] + nzk; 1580 } 1581 1582 if (ai[am] != 0) { 1583 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1584 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1585 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1586 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1587 } else { 1588 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1589 } 1590 1591 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1592 ierr = PetscFree(jl);CHKERRQ(ierr); 1593 1594 /* destroy list of free space and other temporary array(s) */ 1595 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1596 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1597 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1598 1599 /* put together the new matrix in MATSEQSBAIJ format */ 1600 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1601 ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr); 1602 ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr); 1603 1604 b = (Mat_SeqSBAIJ*)(*fact)->data; 1605 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1606 b->singlemalloc = PETSC_FALSE; 1607 /* the next line frees the default space generated by the Create() */ 1608 ierr = PetscFree(b->a);CHKERRQ(ierr); 1609 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1610 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1611 b->j = uj; 1612 b->i = ui; 1613 b->diag = 0; 1614 b->ilen = 0; 1615 b->imax = 0; 1616 b->row = perm; 1617 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1618 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1619 b->icol = perm; 1620 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1621 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1622 /* In b structure: Free imax, ilen, old a, old j. 1623 Allocate idnew, solve_work, new a, new j */ 1624 PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1625 b->maxnz = b->nz = ui[am]; 1626 1627 (*fact)->factor = FACTOR_CHOLESKY; 1628 (*fact)->info.factor_mallocs = reallocs; 1629 (*fact)->info.fill_ratio_given = fill; 1630 if (ai[am] != 0) { 1631 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1632 } else { 1633 (*fact)->info.fill_ratio_needed = 0.0; 1634 } 1635 1636 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1637 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1638 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric_NaturalOrdering","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1639 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_NaturalOrdering; 1640 PetscFunctionReturn(0); 1641 } 1642