1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 #include "src/inline/spops.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 int *c,*r,*ic,ierr,i,n = A->m; 63 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 int *ordcol,*iwk,*iperm,*jw; 65 int jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal permtol,af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (int)(info->dtcount/2.0); 76 jmax = (int)(info->fill*a->nz); 77 permtol = info->dtcol; 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 149 if (ierr) { 150 switch (ierr) { 151 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 153 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 156 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 271 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 272 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 273 PetscReal f; 274 275 PetscFunctionBegin; 276 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 277 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 278 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 279 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 280 281 /* get new row pointers */ 282 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 283 ainew[0] = 0; 284 /* don't know how many column pointers are needed so estimate */ 285 f = info->fill; 286 jmax = (int)(f*ai[n]+1); 287 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 288 /* fill is a linked list of nonzeros in active row */ 289 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 290 im = fill + n + 1; 291 /* idnew is location of diagonal in factor */ 292 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 293 idnew[0] = 0; 294 295 for (i=0; i<n; i++) { 296 /* first copy previous fill into linked list */ 297 nnz = nz = ai[r[i]+1] - ai[r[i]]; 298 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 299 ajtmp = aj + ai[r[i]]; 300 fill[n] = n; 301 while (nz--) { 302 fm = n; 303 idx = ic[*ajtmp++]; 304 do { 305 m = fm; 306 fm = fill[m]; 307 } while (fm < idx); 308 fill[m] = idx; 309 fill[idx] = fm; 310 } 311 row = fill[n]; 312 while (row < i) { 313 ajtmp = ajnew + idnew[row] + 1; 314 nzbd = 1 + idnew[row] - ainew[row]; 315 nz = im[row] - nzbd; 316 fm = row; 317 while (nz-- > 0) { 318 idx = *ajtmp++ ; 319 nzbd++; 320 if (idx == i) im[row] = nzbd; 321 do { 322 m = fm; 323 fm = fill[m]; 324 } while (fm < idx); 325 if (fm != idx) { 326 fill[m] = idx; 327 fill[idx] = fm; 328 fm = idx; 329 nnz++; 330 } 331 } 332 row = fill[row]; 333 } 334 /* copy new filled row into permanent storage */ 335 ainew[i+1] = ainew[i] + nnz; 336 if (ainew[i+1] > jmax) { 337 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 int maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 realloc++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 381 PetscLogObjectParent(*B,isicol); 382 b = (Mat_SeqAIJ*)(*B)->data; 383 ierr = PetscFree(b->imax);CHKERRQ(ierr); 384 b->singlemalloc = PETSC_FALSE; 385 /* the next line frees the default space generated by the Create() */ 386 ierr = PetscFree(b->a);CHKERRQ(ierr); 387 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 388 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 389 b->j = ajnew; 390 b->i = ainew; 391 b->diag = idnew; 392 b->ilen = 0; 393 b->imax = 0; 394 b->row = isrow; 395 b->col = iscol; 396 b->lu_damping = info->damping; 397 b->lu_zeropivot = info->zeropivot; 398 b->lu_shift = info->shift; 399 b->lu_shift_fraction = info->shift_fraction; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 404 /* In b structure: Free imax, ilen, old a, old j. 405 Allocate idnew, solve_work, new a, new j */ 406 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 407 b->maxnz = b->nz = ainew[n] ; 408 409 (*B)->factor = FACTOR_LU; 410 (*B)->info.factor_mallocs = realloc; 411 (*B)->info.fill_ratio_given = f; 412 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 413 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 414 415 if (ai[n] != 0) { 416 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 417 } else { 418 (*B)->info.fill_ratio_needed = 0.0; 419 } 420 PetscFunctionReturn(0); 421 } 422 /* ----------------------------------------------------------- */ 423 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 427 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 428 { 429 Mat C = *B; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 431 IS isrow = b->row,isicol = b->icol; 432 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 433 int *ajtmpold,*ajtmp,nz,row,*ics; 434 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 435 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 436 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 437 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 438 PetscTruth damp,lushift; 439 440 PetscFunctionBegin; 441 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 442 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 443 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 444 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 445 rtmps = rtmp; ics = ic; 446 447 if (!a->diag) { 448 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 449 } 450 451 if (b->lu_shift) { /* set max shift */ 452 int *aai = a->i,*ddiag = a->diag; 453 shift_top = 0; 454 for (i=0; i<n; i++) { 455 d = PetscAbsScalar((a->a)[ddiag[i]]); 456 /* calculate amt of shift needed for this row */ 457 if (d<0) { 458 row_shift = 0; 459 } else { 460 row_shift = -2*d; 461 } 462 v = a->a+aai[i]; 463 for (j=0; j<aai[i+1]-aai[i]; j++) 464 row_shift += PetscAbsScalar(v[j]); 465 if (row_shift>shift_top) shift_top = row_shift; 466 } 467 } 468 469 shift_fraction = 0; shift_amount = 0; 470 do { 471 damp = PETSC_FALSE; 472 lushift = PETSC_FALSE; 473 for (i=0; i<n; i++) { 474 nz = ai[i+1] - ai[i]; 475 ajtmp = aj + ai[i]; 476 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 477 478 /* load in initial (unfactored row) */ 479 nz = a->i[r[i]+1] - a->i[r[i]]; 480 ajtmpold = a->j + a->i[r[i]]; 481 v = a->a + a->i[r[i]]; 482 for (j=0; j<nz; j++) { 483 rtmp[ics[ajtmpold[j]]] = v[j]; 484 } 485 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 486 487 row = *ajtmp++ ; 488 while (row < i) { 489 pc = rtmp + row; 490 if (*pc != 0.0) { 491 pv = b->a + diag_offset[row] ; 492 pj = b->j + diag_offset[row] + 1; 493 multiplier = *pc / *pv++; 494 *pc = multiplier; 495 nz = ai[row+1] - diag_offset[row] - 1; 496 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 497 PetscLogFlops(2*nz); 498 } 499 row = *ajtmp++; 500 } 501 /* finished row so stick it into b->a */ 502 pv = b->a + ai[i] ; 503 pj = b->j + ai[i] ; 504 nz = ai[i+1] - ai[i]; 505 diag = diag_offset[i] - ai[i]; 506 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 507 rs = 0.0; 508 for (j=0; j<nz; j++) { 509 pv[j] = rtmps[pj[j]]; 510 if (j != diag) rs += PetscAbsScalar(pv[j]); 511 } 512 #define MAX_NSHIFT 5 513 if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) { 514 if (nshift>MAX_NSHIFT) { 515 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 516 } else if (nshift==MAX_NSHIFT) { 517 shift_fraction = shift_hi; 518 } else { 519 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 520 } 521 shift_amount = shift_fraction * shift_top; 522 lushift = PETSC_TRUE; 523 nshift++; 524 break; 525 } 526 if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 527 if (damping) { 528 if (ndamp) damping *= 2.0; 529 damp = PETSC_TRUE; 530 ndamp++; 531 break; 532 } else { 533 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 534 } 535 } 536 } 537 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 538 /* 539 * if not already shifting up & shifting & started shifting & can refine, 540 * then try lower shift 541 */ 542 shift_hi = shift_fraction; 543 shift_fraction = (shift_hi+shift_lo)/2.; 544 shift_amount = shift_fraction * shift_top; 545 lushift = PETSC_TRUE; 546 nshift++; 547 } 548 } while (damp || lushift); 549 550 /* invert diagonal entries for simplier triangular solves */ 551 for (i=0; i<n; i++) { 552 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 553 } 554 555 ierr = PetscFree(rtmp);CHKERRQ(ierr); 556 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 557 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 558 C->factor = FACTOR_LU; 559 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 560 C->assembled = PETSC_TRUE; 561 PetscLogFlops(C->n); 562 if (ndamp) { 563 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 564 } 565 if (nshift) { 566 b->lu_shift_fraction = shift_fraction; 567 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction); 568 } 569 PetscFunctionReturn(0); 570 } 571 /* ----------------------------------------------------------- */ 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatLUFactor_SeqAIJ" 574 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 575 { 576 int ierr; 577 Mat C; 578 579 PetscFunctionBegin; 580 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 581 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 582 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 583 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 584 PetscFunctionReturn(0); 585 } 586 /* ----------------------------------------------------------- */ 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatSolve_SeqAIJ" 589 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 590 { 591 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 592 IS iscol = a->col,isrow = a->row; 593 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 594 int nz,*rout,*cout; 595 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 596 597 PetscFunctionBegin; 598 if (!n) PetscFunctionReturn(0); 599 600 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 602 tmp = a->solve_work; 603 604 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 605 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 606 607 /* forward solve the lower triangular */ 608 tmp[0] = b[*r++]; 609 tmps = tmp; 610 for (i=1; i<n; i++) { 611 v = aa + ai[i] ; 612 vi = aj + ai[i] ; 613 nz = a->diag[i] - ai[i]; 614 sum = b[*r++]; 615 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 616 tmp[i] = sum; 617 } 618 619 /* backward solve the upper triangular */ 620 for (i=n-1; i>=0; i--){ 621 v = aa + a->diag[i] + 1; 622 vi = aj + a->diag[i] + 1; 623 nz = ai[i+1] - a->diag[i] - 1; 624 sum = tmp[i]; 625 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 626 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 627 } 628 629 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 630 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 631 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 632 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 633 PetscLogFlops(2*a->nz - A->n); 634 PetscFunctionReturn(0); 635 } 636 637 /* ----------------------------------------------------------- */ 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 640 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 641 { 642 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 643 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 644 PetscScalar *x,*b,*aa = a->a; 645 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 646 int adiag_i,i,*vi,nz,ai_i; 647 PetscScalar *v,sum; 648 #endif 649 650 PetscFunctionBegin; 651 if (!n) PetscFunctionReturn(0); 652 653 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 654 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 655 656 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 657 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 658 #else 659 /* forward solve the lower triangular */ 660 x[0] = b[0]; 661 for (i=1; i<n; i++) { 662 ai_i = ai[i]; 663 v = aa + ai_i; 664 vi = aj + ai_i; 665 nz = adiag[i] - ai_i; 666 sum = b[i]; 667 while (nz--) sum -= *v++ * x[*vi++]; 668 x[i] = sum; 669 } 670 671 /* backward solve the upper triangular */ 672 for (i=n-1; i>=0; i--){ 673 adiag_i = adiag[i]; 674 v = aa + adiag_i + 1; 675 vi = aj + adiag_i + 1; 676 nz = ai[i+1] - adiag_i - 1; 677 sum = x[i]; 678 while (nz--) sum -= *v++ * x[*vi++]; 679 x[i] = sum*aa[adiag_i]; 680 } 681 #endif 682 PetscLogFlops(2*a->nz - A->n); 683 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 684 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 690 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 691 { 692 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 693 IS iscol = a->col,isrow = a->row; 694 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 695 int nz,*rout,*cout; 696 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 697 698 PetscFunctionBegin; 699 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 700 701 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 702 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 703 tmp = a->solve_work; 704 705 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 706 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 707 708 /* forward solve the lower triangular */ 709 tmp[0] = b[*r++]; 710 for (i=1; i<n; i++) { 711 v = aa + ai[i] ; 712 vi = aj + ai[i] ; 713 nz = a->diag[i] - ai[i]; 714 sum = b[*r++]; 715 while (nz--) sum -= *v++ * tmp[*vi++ ]; 716 tmp[i] = sum; 717 } 718 719 /* backward solve the upper triangular */ 720 for (i=n-1; i>=0; i--){ 721 v = aa + a->diag[i] + 1; 722 vi = aj + a->diag[i] + 1; 723 nz = ai[i+1] - a->diag[i] - 1; 724 sum = tmp[i]; 725 while (nz--) sum -= *v++ * tmp[*vi++ ]; 726 tmp[i] = sum*aa[a->diag[i]]; 727 x[*c--] += tmp[i]; 728 } 729 730 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 731 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 732 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 733 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 734 PetscLogFlops(2*a->nz); 735 736 PetscFunctionReturn(0); 737 } 738 /* -------------------------------------------------------------------*/ 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 741 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 742 { 743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 744 IS iscol = a->col,isrow = a->row; 745 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 746 int nz,*rout,*cout,*diag = a->diag; 747 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 748 749 PetscFunctionBegin; 750 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 751 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 752 tmp = a->solve_work; 753 754 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 755 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 756 757 /* copy the b into temp work space according to permutation */ 758 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 759 760 /* forward solve the U^T */ 761 for (i=0; i<n; i++) { 762 v = aa + diag[i] ; 763 vi = aj + diag[i] + 1; 764 nz = ai[i+1] - diag[i] - 1; 765 s1 = tmp[i]; 766 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 767 while (nz--) { 768 tmp[*vi++ ] -= (*v++)*s1; 769 } 770 tmp[i] = s1; 771 } 772 773 /* backward solve the L^T */ 774 for (i=n-1; i>=0; i--){ 775 v = aa + diag[i] - 1 ; 776 vi = aj + diag[i] - 1 ; 777 nz = diag[i] - ai[i]; 778 s1 = tmp[i]; 779 while (nz--) { 780 tmp[*vi-- ] -= (*v--)*s1; 781 } 782 } 783 784 /* copy tmp into x according to permutation */ 785 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 786 787 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 788 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 789 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 790 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 791 792 PetscLogFlops(2*a->nz-A->n); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 798 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 799 { 800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 801 IS iscol = a->col,isrow = a->row; 802 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 803 int nz,*rout,*cout,*diag = a->diag; 804 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 805 806 PetscFunctionBegin; 807 if (zz != xx) VecCopy(zz,xx); 808 809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 tmp = a->solve_work; 812 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 /* copy the b into temp work space according to permutation */ 817 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 818 819 /* forward solve the U^T */ 820 for (i=0; i<n; i++) { 821 v = aa + diag[i] ; 822 vi = aj + diag[i] + 1; 823 nz = ai[i+1] - diag[i] - 1; 824 tmp[i] *= *v++; 825 while (nz--) { 826 tmp[*vi++ ] -= (*v++)*tmp[i]; 827 } 828 } 829 830 /* backward solve the L^T */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + diag[i] - 1 ; 833 vi = aj + diag[i] - 1 ; 834 nz = diag[i] - ai[i]; 835 while (nz--) { 836 tmp[*vi-- ] -= (*v--)*tmp[i]; 837 } 838 } 839 840 /* copy tmp into x according to permutation */ 841 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 848 PetscLogFlops(2*a->nz); 849 PetscFunctionReturn(0); 850 } 851 /* ----------------------------------------------------------------*/ 852 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 856 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 859 IS isicol; 860 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 861 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 862 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 863 int incrlev,nnz,i,levels,diagonal_fill; 864 PetscTruth col_identity,row_identity; 865 PetscReal f; 866 867 PetscFunctionBegin; 868 f = info->fill; 869 levels = (int)info->levels; 870 diagonal_fill = (int)info->diagonal_fill; 871 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 872 873 /* special case that simply copies fill pattern */ 874 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 875 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 876 if (!levels && row_identity && col_identity) { 877 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 878 (*fact)->factor = FACTOR_LU; 879 b = (Mat_SeqAIJ*)(*fact)->data; 880 if (!b->diag) { 881 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 882 } 883 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 884 b->row = isrow; 885 b->col = iscol; 886 b->icol = isicol; 887 b->lu_damping = info->damping; 888 b->lu_zeropivot = info->zeropivot; 889 b->lu_shift = info->shift; 890 b->lu_shift_fraction= info->shift_fraction; 891 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 892 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 893 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 899 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 900 901 /* get new row pointers */ 902 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 903 ainew[0] = 0; 904 /* don't know how many column pointers are needed so estimate */ 905 jmax = (int)(f*(ai[n]+1)); 906 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 907 /* ajfill is level of fill for each fill entry */ 908 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 909 /* fill is a linked list of nonzeros in active row */ 910 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 911 /* im is level for each filled value */ 912 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 913 /* dloc is location of diagonal in factor */ 914 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 915 dloc[0] = 0; 916 for (prow=0; prow<n; prow++) { 917 918 /* copy current row into linked list */ 919 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 920 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 921 xi = aj + ai[r[prow]] ; 922 fill[n] = n; 923 fill[prow] = -1; /* marker to indicate if diagonal exists */ 924 while (nz--) { 925 fm = n; 926 idx = ic[*xi++ ]; 927 do { 928 m = fm; 929 fm = fill[m]; 930 } while (fm < idx); 931 fill[m] = idx; 932 fill[idx] = fm; 933 im[idx] = 0; 934 } 935 936 /* make sure diagonal entry is included */ 937 if (diagonal_fill && fill[prow] == -1) { 938 fm = n; 939 while (fill[fm] < prow) fm = fill[fm]; 940 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 941 fill[fm] = prow; 942 im[prow] = 0; 943 nzf++; 944 dcount++; 945 } 946 947 nzi = 0; 948 row = fill[n]; 949 while (row < prow) { 950 incrlev = im[row] + 1; 951 nz = dloc[row]; 952 xi = ajnew + ainew[row] + nz + 1; 953 flev = ajfill + ainew[row] + nz + 1; 954 nnz = ainew[row+1] - ainew[row] - nz - 1; 955 fm = row; 956 while (nnz-- > 0) { 957 idx = *xi++ ; 958 if (*flev + incrlev > levels) { 959 flev++; 960 continue; 961 } 962 do { 963 m = fm; 964 fm = fill[m]; 965 } while (fm < idx); 966 if (fm != idx) { 967 im[idx] = *flev + incrlev; 968 fill[m] = idx; 969 fill[idx] = fm; 970 fm = idx; 971 nzf++; 972 } else { 973 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 974 } 975 flev++; 976 } 977 row = fill[row]; 978 nzi++; 979 } 980 /* copy new filled row into permanent storage */ 981 ainew[prow+1] = ainew[prow] + nzf; 982 if (ainew[prow+1] > jmax) { 983 984 /* estimate how much additional space we will need */ 985 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 986 /* just double the memory each time */ 987 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 988 int maxadd = jmax; 989 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 990 jmax += maxadd; 991 992 /* allocate a longer ajnew and ajfill */ 993 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 994 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 995 ierr = PetscFree(ajnew);CHKERRQ(ierr); 996 ajnew = xi; 997 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 998 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 999 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1000 ajfill = xi; 1001 realloc++; /* count how many times we realloc */ 1002 } 1003 xi = ajnew + ainew[prow] ; 1004 flev = ajfill + ainew[prow] ; 1005 dloc[prow] = nzi; 1006 fm = fill[n]; 1007 while (nzf--) { 1008 *xi++ = fm ; 1009 *flev++ = im[fm]; 1010 fm = fill[fm]; 1011 } 1012 /* make sure row has diagonal entry */ 1013 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1014 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1015 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1016 } 1017 } 1018 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1019 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1020 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1021 ierr = PetscFree(fill);CHKERRQ(ierr); 1022 ierr = PetscFree(im);CHKERRQ(ierr); 1023 1024 { 1025 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1026 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1027 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1028 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1029 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1030 if (diagonal_fill) { 1031 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1032 } 1033 } 1034 1035 /* put together the new matrix */ 1036 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1037 PetscLogObjectParent(*fact,isicol); 1038 b = (Mat_SeqAIJ*)(*fact)->data; 1039 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1040 b->singlemalloc = PETSC_FALSE; 1041 len = (ainew[n] )*sizeof(PetscScalar); 1042 /* the next line frees the default space generated by the Create() */ 1043 ierr = PetscFree(b->a);CHKERRQ(ierr); 1044 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1045 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1046 b->j = ajnew; 1047 b->i = ainew; 1048 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1049 b->diag = dloc; 1050 b->ilen = 0; 1051 b->imax = 0; 1052 b->row = isrow; 1053 b->col = iscol; 1054 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1055 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1056 b->icol = isicol; 1057 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1058 /* In b structure: Free imax, ilen, old a, old j. 1059 Allocate dloc, solve_work, new a, new j */ 1060 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1061 b->maxnz = b->nz = ainew[n] ; 1062 b->lu_damping = info->damping; 1063 b->lu_shift = info->shift; 1064 b->lu_shift_fraction = info->shift_fraction; 1065 b->lu_zeropivot = info->zeropivot; 1066 (*fact)->factor = FACTOR_LU; 1067 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1068 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1069 1070 (*fact)->info.factor_mallocs = realloc; 1071 (*fact)->info.fill_ratio_given = f; 1072 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #include "src/mat/impls/sbaij/seq/sbaij.h" 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1079 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1080 { 1081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1082 int ierr; 1083 1084 PetscFunctionBegin; 1085 if (!a->sbaijMat){ 1086 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1087 } 1088 1089 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1090 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1091 a->sbaijMat = PETSC_NULL; 1092 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1098 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1099 { 1100 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1101 int ierr; 1102 PetscTruth perm_identity; 1103 1104 PetscFunctionBegin; 1105 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1106 if (!perm_identity){ 1107 SETERRQ(1,"Non-identity permutation is not supported yet"); 1108 } 1109 if (!a->sbaijMat){ 1110 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1111 } 1112 1113 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1114 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1115 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1121 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1122 { 1123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1124 int ierr; 1125 PetscTruth perm_identity; 1126 1127 PetscFunctionBegin; 1128 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1129 if (!perm_identity){ 1130 SETERRQ(1,"Non-identity permutation is not supported yet"); 1131 } 1132 if (!a->sbaijMat){ 1133 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1134 } 1135 1136 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1137 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1138 1139 PetscFunctionReturn(0); 1140 } 1141