1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 21 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 22 23 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 24 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 25 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 int *c,*r,*ic,ierr,i,n = A->m; 62 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 int *ordcol,*iwk,*iperm,*jw; 64 int jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal permtol,af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (int)(info->dtcount/2.0); 75 jmax = (int)(info->fill*a->nz); 76 permtol = info->dtcol; 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 /* check out for identical nodes. If found, use inode functions */ 250 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 251 252 af = ((double)b->nz)/((double)a->nz) + .001; 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 256 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 /* 263 Factorization code for AIJ format. 264 */ 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 267 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 268 { 269 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 270 IS isicol; 271 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 272 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (int)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 int maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 realloc++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = realloc; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 436 int *ajtmpold,*ajtmp,nz,row,*ics; 437 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 438 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 439 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 440 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 441 PetscTruth damp,lushift; 442 443 PetscFunctionBegin; 444 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 445 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 446 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 447 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 448 rtmps = rtmp; ics = ic; 449 450 if (!a->diag) { 451 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 452 } 453 454 if (b->lu_shift) { /* set max shift */ 455 int *aai = a->i,*ddiag = a->diag; 456 shift_top = 0; 457 for (i=0; i<n; i++) { 458 d = PetscAbsScalar((a->a)[ddiag[i]]); 459 /* calculate amt of shift needed for this row */ 460 if (d<=0) { 461 row_shift = 0; 462 } else { 463 row_shift = -2*d; 464 } 465 v = a->a+aai[i]; 466 for (j=0; j<aai[i+1]-aai[i]; j++) 467 row_shift += PetscAbsScalar(v[j]); 468 if (row_shift>shift_top) shift_top = row_shift; 469 } 470 } 471 472 shift_fraction = 0; shift_amount = 0; 473 do { 474 damp = PETSC_FALSE; 475 lushift = PETSC_FALSE; 476 for (i=0; i<n; i++) { 477 nz = ai[i+1] - ai[i]; 478 ajtmp = aj + ai[i]; 479 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 480 481 /* load in initial (unfactored row) */ 482 nz = a->i[r[i]+1] - a->i[r[i]]; 483 ajtmpold = a->j + a->i[r[i]]; 484 v = a->a + a->i[r[i]]; 485 for (j=0; j<nz; j++) { 486 rtmp[ics[ajtmpold[j]]] = v[j]; 487 } 488 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 489 490 row = *ajtmp++ ; 491 while (row < i) { 492 pc = rtmp + row; 493 if (*pc != 0.0) { 494 pv = b->a + diag_offset[row] ; 495 pj = b->j + diag_offset[row] + 1; 496 multiplier = *pc / *pv++; 497 *pc = multiplier; 498 nz = ai[row+1] - diag_offset[row] - 1; 499 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 500 PetscLogFlops(2*nz); 501 } 502 row = *ajtmp++; 503 } 504 /* finished row so stick it into b->a */ 505 pv = b->a + ai[i] ; 506 pj = b->j + ai[i] ; 507 nz = ai[i+1] - ai[i]; 508 diag = diag_offset[i] - ai[i]; 509 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 510 rs = 0.0; 511 for (j=0; j<nz; j++) { 512 pv[j] = rtmps[pj[j]]; 513 if (j != diag) rs += PetscAbsScalar(pv[j]); 514 } 515 #define MAX_NSHIFT 5 516 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 517 if (nshift>MAX_NSHIFT) { 518 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 519 } else if (nshift==MAX_NSHIFT) { 520 shift_fraction = shift_hi; 521 lushift = PETSC_FALSE; 522 } else { 523 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 524 lushift = PETSC_TRUE; 525 } 526 shift_amount = shift_fraction * shift_top; 527 nshift++; 528 break; 529 } 530 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 531 if (damping) { 532 if (ndamp) damping *= 2.0; 533 damp = PETSC_TRUE; 534 ndamp++; 535 break; 536 } else { 537 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 538 } 539 } 540 } 541 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 542 /* 543 * if no shift in this attempt & shifting & started shifting & can refine, 544 * then try lower shift 545 */ 546 shift_hi = shift_fraction; 547 shift_fraction = (shift_hi+shift_lo)/2.; 548 shift_amount = shift_fraction * shift_top; 549 lushift = PETSC_TRUE; 550 nshift++; 551 } 552 } while (damp || lushift); 553 554 /* invert diagonal entries for simplier triangular solves */ 555 for (i=0; i<n; i++) { 556 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 557 } 558 559 ierr = PetscFree(rtmp);CHKERRQ(ierr); 560 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 562 C->factor = FACTOR_LU; 563 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 564 C->assembled = PETSC_TRUE; 565 PetscLogFlops(C->n); 566 if (ndamp) { 567 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 568 } 569 if (nshift) { 570 b->lu_shift_fraction = shift_fraction; 571 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift); 572 } 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 578 int MatUsePETSc_SeqAIJ(Mat A) 579 { 580 PetscFunctionBegin; 581 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 582 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 583 PetscFunctionReturn(0); 584 } 585 586 587 /* ----------------------------------------------------------- */ 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatLUFactor_SeqAIJ" 590 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 591 { 592 int ierr; 593 Mat C; 594 595 PetscFunctionBegin; 596 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 597 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 598 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 599 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 600 PetscFunctionReturn(0); 601 } 602 /* ----------------------------------------------------------- */ 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatSolve_SeqAIJ" 605 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 606 { 607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 608 IS iscol = a->col,isrow = a->row; 609 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 610 int nz,*rout,*cout; 611 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 612 613 PetscFunctionBegin; 614 if (!n) PetscFunctionReturn(0); 615 616 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 617 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 618 tmp = a->solve_work; 619 620 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 621 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 622 623 /* forward solve the lower triangular */ 624 tmp[0] = b[*r++]; 625 tmps = tmp; 626 for (i=1; i<n; i++) { 627 v = aa + ai[i] ; 628 vi = aj + ai[i] ; 629 nz = a->diag[i] - ai[i]; 630 sum = b[*r++]; 631 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 632 tmp[i] = sum; 633 } 634 635 /* backward solve the upper triangular */ 636 for (i=n-1; i>=0; i--){ 637 v = aa + a->diag[i] + 1; 638 vi = aj + a->diag[i] + 1; 639 nz = ai[i+1] - a->diag[i] - 1; 640 sum = tmp[i]; 641 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 642 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 643 } 644 645 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 646 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 647 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 648 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 649 PetscLogFlops(2*a->nz - A->n); 650 PetscFunctionReturn(0); 651 } 652 653 /* ----------------------------------------------------------- */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 656 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 657 { 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 659 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 660 PetscScalar *x,*b,*aa = a->a; 661 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 662 int adiag_i,i,*vi,nz,ai_i; 663 PetscScalar *v,sum; 664 #endif 665 666 PetscFunctionBegin; 667 if (!n) PetscFunctionReturn(0); 668 669 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 670 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 671 672 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 673 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 674 #else 675 /* forward solve the lower triangular */ 676 x[0] = b[0]; 677 for (i=1; i<n; i++) { 678 ai_i = ai[i]; 679 v = aa + ai_i; 680 vi = aj + ai_i; 681 nz = adiag[i] - ai_i; 682 sum = b[i]; 683 while (nz--) sum -= *v++ * x[*vi++]; 684 x[i] = sum; 685 } 686 687 /* backward solve the upper triangular */ 688 for (i=n-1; i>=0; i--){ 689 adiag_i = adiag[i]; 690 v = aa + adiag_i + 1; 691 vi = aj + adiag_i + 1; 692 nz = ai[i+1] - adiag_i - 1; 693 sum = x[i]; 694 while (nz--) sum -= *v++ * x[*vi++]; 695 x[i] = sum*aa[adiag_i]; 696 } 697 #endif 698 PetscLogFlops(2*a->nz - A->n); 699 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 706 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 707 { 708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 709 IS iscol = a->col,isrow = a->row; 710 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 711 int nz,*rout,*cout; 712 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 713 714 PetscFunctionBegin; 715 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 716 717 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 718 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 719 tmp = a->solve_work; 720 721 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 722 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 723 724 /* forward solve the lower triangular */ 725 tmp[0] = b[*r++]; 726 for (i=1; i<n; i++) { 727 v = aa + ai[i] ; 728 vi = aj + ai[i] ; 729 nz = a->diag[i] - ai[i]; 730 sum = b[*r++]; 731 while (nz--) sum -= *v++ * tmp[*vi++ ]; 732 tmp[i] = sum; 733 } 734 735 /* backward solve the upper triangular */ 736 for (i=n-1; i>=0; i--){ 737 v = aa + a->diag[i] + 1; 738 vi = aj + a->diag[i] + 1; 739 nz = ai[i+1] - a->diag[i] - 1; 740 sum = tmp[i]; 741 while (nz--) sum -= *v++ * tmp[*vi++ ]; 742 tmp[i] = sum*aa[a->diag[i]]; 743 x[*c--] += tmp[i]; 744 } 745 746 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 748 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 750 PetscLogFlops(2*a->nz); 751 752 PetscFunctionReturn(0); 753 } 754 /* -------------------------------------------------------------------*/ 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 757 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 758 { 759 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 760 IS iscol = a->col,isrow = a->row; 761 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 762 int nz,*rout,*cout,*diag = a->diag; 763 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 764 765 PetscFunctionBegin; 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 772 773 /* copy the b into temp work space according to permutation */ 774 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 775 776 /* forward solve the U^T */ 777 for (i=0; i<n; i++) { 778 v = aa + diag[i] ; 779 vi = aj + diag[i] + 1; 780 nz = ai[i+1] - diag[i] - 1; 781 s1 = tmp[i]; 782 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 783 while (nz--) { 784 tmp[*vi++ ] -= (*v++)*s1; 785 } 786 tmp[i] = s1; 787 } 788 789 /* backward solve the L^T */ 790 for (i=n-1; i>=0; i--){ 791 v = aa + diag[i] - 1 ; 792 vi = aj + diag[i] - 1 ; 793 nz = diag[i] - ai[i]; 794 s1 = tmp[i]; 795 while (nz--) { 796 tmp[*vi-- ] -= (*v--)*s1; 797 } 798 } 799 800 /* copy tmp into x according to permutation */ 801 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 802 803 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 804 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 805 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 806 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 807 808 PetscLogFlops(2*a->nz-A->n); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 814 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 815 { 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 817 IS iscol = a->col,isrow = a->row; 818 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 819 int nz,*rout,*cout,*diag = a->diag; 820 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 821 822 PetscFunctionBegin; 823 if (zz != xx) VecCopy(zz,xx); 824 825 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 827 tmp = a->solve_work; 828 829 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 830 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 831 832 /* copy the b into temp work space according to permutation */ 833 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 834 835 /* forward solve the U^T */ 836 for (i=0; i<n; i++) { 837 v = aa + diag[i] ; 838 vi = aj + diag[i] + 1; 839 nz = ai[i+1] - diag[i] - 1; 840 tmp[i] *= *v++; 841 while (nz--) { 842 tmp[*vi++ ] -= (*v++)*tmp[i]; 843 } 844 } 845 846 /* backward solve the L^T */ 847 for (i=n-1; i>=0; i--){ 848 v = aa + diag[i] - 1 ; 849 vi = aj + diag[i] - 1 ; 850 nz = diag[i] - ai[i]; 851 while (nz--) { 852 tmp[*vi-- ] -= (*v--)*tmp[i]; 853 } 854 } 855 856 /* copy tmp into x according to permutation */ 857 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 858 859 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 860 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 861 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 863 864 PetscLogFlops(2*a->nz); 865 PetscFunctionReturn(0); 866 } 867 /* ----------------------------------------------------------------*/ 868 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 872 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 873 { 874 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 875 IS isicol; 876 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 877 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 878 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 879 int incrlev,nnz,i,levels,diagonal_fill; 880 PetscTruth col_identity,row_identity; 881 PetscReal f; 882 883 PetscFunctionBegin; 884 f = info->fill; 885 levels = (int)info->levels; 886 diagonal_fill = (int)info->diagonal_fill; 887 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 888 889 /* special case that simply copies fill pattern */ 890 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 891 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 892 if (!levels && row_identity && col_identity) { 893 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 894 (*fact)->factor = FACTOR_LU; 895 b = (Mat_SeqAIJ*)(*fact)->data; 896 if (!b->diag) { 897 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 } 899 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 b->row = isrow; 901 b->col = iscol; 902 b->icol = isicol; 903 b->lu_damping = info->damping; 904 b->lu_zeropivot = info->zeropivot; 905 b->lu_shift = info->shift; 906 b->lu_shift_fraction= info->shift_fraction; 907 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 908 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 909 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 910 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 915 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 916 917 /* get new row pointers */ 918 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 919 ainew[0] = 0; 920 /* don't know how many column pointers are needed so estimate */ 921 jmax = (int)(f*(ai[n]+1)); 922 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 923 /* ajfill is level of fill for each fill entry */ 924 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 925 /* fill is a linked list of nonzeros in active row */ 926 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 927 /* im is level for each filled value */ 928 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 929 /* dloc is location of diagonal in factor */ 930 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 931 dloc[0] = 0; 932 for (prow=0; prow<n; prow++) { 933 934 /* copy current row into linked list */ 935 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 936 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 937 xi = aj + ai[r[prow]] ; 938 fill[n] = n; 939 fill[prow] = -1; /* marker to indicate if diagonal exists */ 940 while (nz--) { 941 fm = n; 942 idx = ic[*xi++ ]; 943 do { 944 m = fm; 945 fm = fill[m]; 946 } while (fm < idx); 947 fill[m] = idx; 948 fill[idx] = fm; 949 im[idx] = 0; 950 } 951 952 /* make sure diagonal entry is included */ 953 if (diagonal_fill && fill[prow] == -1) { 954 fm = n; 955 while (fill[fm] < prow) fm = fill[fm]; 956 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 957 fill[fm] = prow; 958 im[prow] = 0; 959 nzf++; 960 dcount++; 961 } 962 963 nzi = 0; 964 row = fill[n]; 965 while (row < prow) { 966 incrlev = im[row] + 1; 967 nz = dloc[row]; 968 xi = ajnew + ainew[row] + nz + 1; 969 flev = ajfill + ainew[row] + nz + 1; 970 nnz = ainew[row+1] - ainew[row] - nz - 1; 971 fm = row; 972 while (nnz-- > 0) { 973 idx = *xi++ ; 974 if (*flev + incrlev > levels) { 975 flev++; 976 continue; 977 } 978 do { 979 m = fm; 980 fm = fill[m]; 981 } while (fm < idx); 982 if (fm != idx) { 983 im[idx] = *flev + incrlev; 984 fill[m] = idx; 985 fill[idx] = fm; 986 fm = idx; 987 nzf++; 988 } else { 989 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 990 } 991 flev++; 992 } 993 row = fill[row]; 994 nzi++; 995 } 996 /* copy new filled row into permanent storage */ 997 ainew[prow+1] = ainew[prow] + nzf; 998 if (ainew[prow+1] > jmax) { 999 1000 /* estimate how much additional space we will need */ 1001 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1002 /* just double the memory each time */ 1003 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1004 int maxadd = jmax; 1005 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1006 jmax += maxadd; 1007 1008 /* allocate a longer ajnew and ajfill */ 1009 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1010 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1011 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1012 ajnew = xi; 1013 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1014 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1015 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1016 ajfill = xi; 1017 realloc++; /* count how many times we realloc */ 1018 } 1019 xi = ajnew + ainew[prow] ; 1020 flev = ajfill + ainew[prow] ; 1021 dloc[prow] = nzi; 1022 fm = fill[n]; 1023 while (nzf--) { 1024 *xi++ = fm ; 1025 *flev++ = im[fm]; 1026 fm = fill[fm]; 1027 } 1028 /* make sure row has diagonal entry */ 1029 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1030 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1031 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1032 } 1033 } 1034 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1035 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1036 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1037 ierr = PetscFree(fill);CHKERRQ(ierr); 1038 ierr = PetscFree(im);CHKERRQ(ierr); 1039 1040 { 1041 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1044 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1045 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1046 if (diagonal_fill) { 1047 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1048 } 1049 } 1050 1051 /* put together the new matrix */ 1052 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1053 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1054 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1055 PetscLogObjectParent(*fact,isicol); 1056 b = (Mat_SeqAIJ*)(*fact)->data; 1057 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1058 b->singlemalloc = PETSC_FALSE; 1059 len = (ainew[n] )*sizeof(PetscScalar); 1060 /* the next line frees the default space generated by the Create() */ 1061 ierr = PetscFree(b->a);CHKERRQ(ierr); 1062 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1063 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1064 b->j = ajnew; 1065 b->i = ainew; 1066 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1067 b->diag = dloc; 1068 b->ilen = 0; 1069 b->imax = 0; 1070 b->row = isrow; 1071 b->col = iscol; 1072 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1073 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1074 b->icol = isicol; 1075 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1076 /* In b structure: Free imax, ilen, old a, old j. 1077 Allocate dloc, solve_work, new a, new j */ 1078 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1079 b->maxnz = b->nz = ainew[n] ; 1080 b->lu_damping = info->damping; 1081 b->lu_shift = info->shift; 1082 b->lu_shift_fraction = info->shift_fraction; 1083 b->lu_zeropivot = info->zeropivot; 1084 (*fact)->factor = FACTOR_LU; 1085 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1086 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1087 1088 (*fact)->info.factor_mallocs = realloc; 1089 (*fact)->info.fill_ratio_given = f; 1090 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #include "src/mat/impls/sbaij/seq/sbaij.h" 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1097 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1098 { 1099 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1100 int ierr; 1101 1102 PetscFunctionBegin; 1103 if (!a->sbaijMat){ 1104 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1105 } 1106 1107 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1108 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1109 a->sbaijMat = PETSC_NULL; 1110 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1116 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1117 { 1118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1119 int ierr; 1120 PetscTruth perm_identity; 1121 1122 PetscFunctionBegin; 1123 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1124 if (!perm_identity){ 1125 SETERRQ(1,"Non-identity permutation is not supported yet"); 1126 } 1127 if (!a->sbaijMat){ 1128 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1129 } 1130 1131 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1132 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1133 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1139 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1140 { 1141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1142 int ierr; 1143 PetscTruth perm_identity; 1144 1145 PetscFunctionBegin; 1146 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1147 if (!perm_identity){ 1148 SETERRQ(1,"Non-identity permutation is not supported yet"); 1149 } 1150 if (!a->sbaijMat){ 1151 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1152 } 1153 1154 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1155 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1156 1157 PetscFunctionReturn(0); 1158 } 1159