1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 int MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 23 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 24 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 int *c,*r,*ic,ierr,i,n = A->m; 61 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 62 int *ordcol,*iwk,*iperm,*jw; 63 int jmax,lfill,job,*o_i,*o_j; 64 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 65 PetscReal permtol,af; 66 67 PetscFunctionBegin; 68 69 if (info->dt == PETSC_DEFAULT) info->dt = .005; 70 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 71 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 72 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 73 lfill = (int)(info->dtcount/2.0); 74 jmax = (int)(info->fill*a->nz); 75 permtol = info->dtcol; 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 147 if (ierr) { 148 switch (ierr) { 149 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 150 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 154 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 271 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 272 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 273 PetscReal f; 274 275 PetscFunctionBegin; 276 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 277 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 278 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 279 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 280 281 /* get new row pointers */ 282 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 283 ainew[0] = 0; 284 /* don't know how many column pointers are needed so estimate */ 285 f = info->fill; 286 jmax = (int)(f*ai[n]+1); 287 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 288 /* fill is a linked list of nonzeros in active row */ 289 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 290 im = fill + n + 1; 291 /* idnew is location of diagonal in factor */ 292 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 293 idnew[0] = 0; 294 295 for (i=0; i<n; i++) { 296 /* first copy previous fill into linked list */ 297 nnz = nz = ai[r[i]+1] - ai[r[i]]; 298 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 299 ajtmp = aj + ai[r[i]]; 300 fill[n] = n; 301 while (nz--) { 302 fm = n; 303 idx = ic[*ajtmp++]; 304 do { 305 m = fm; 306 fm = fill[m]; 307 } while (fm < idx); 308 fill[m] = idx; 309 fill[idx] = fm; 310 } 311 row = fill[n]; 312 while (row < i) { 313 ajtmp = ajnew + idnew[row] + 1; 314 nzbd = 1 + idnew[row] - ainew[row]; 315 nz = im[row] - nzbd; 316 fm = row; 317 while (nz-- > 0) { 318 idx = *ajtmp++ ; 319 nzbd++; 320 if (idx == i) im[row] = nzbd; 321 do { 322 m = fm; 323 fm = fill[m]; 324 } while (fm < idx); 325 if (fm != idx) { 326 fill[m] = idx; 327 fill[idx] = fm; 328 fm = idx; 329 nnz++; 330 } 331 } 332 row = fill[row]; 333 } 334 /* copy new filled row into permanent storage */ 335 ainew[i+1] = ainew[i] + nnz; 336 if (ainew[i+1] > jmax) { 337 338 /* estimate how much additional space we will need */ 339 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 340 /* just double the memory each time */ 341 int maxadd = jmax; 342 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 343 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 344 jmax += maxadd; 345 346 /* allocate a longer ajnew */ 347 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 348 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));CHKERRQ(ierr); 349 ierr = PetscFree(ajnew);CHKERRQ(ierr); 350 ajnew = ajtmp; 351 realloc++; /* count how many times we realloc */ 352 } 353 ajtmp = ajnew + ainew[i]; 354 fm = fill[n]; 355 nzi = 0; 356 im[i] = nnz; 357 while (nnz--) { 358 if (fm < i) nzi++; 359 *ajtmp++ = fm ; 360 fm = fill[fm]; 361 } 362 idnew[i] = ainew[i] + nzi; 363 } 364 if (ai[n] != 0) { 365 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 366 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 370 } else { 371 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 372 } 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 ierr = PetscFree(fill);CHKERRQ(ierr); 378 379 /* put together the new matrix */ 380 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 381 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 382 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 383 PetscLogObjectParent(*B,isicol); 384 b = (Mat_SeqAIJ*)(*B)->data; 385 ierr = PetscFree(b->imax);CHKERRQ(ierr); 386 b->singlemalloc = PETSC_FALSE; 387 /* the next line frees the default space generated by the Create() */ 388 ierr = PetscFree(b->a);CHKERRQ(ierr); 389 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 390 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 391 b->j = ajnew; 392 b->i = ainew; 393 b->diag = idnew; 394 b->ilen = 0; 395 b->imax = 0; 396 b->row = isrow; 397 b->col = iscol; 398 b->lu_damping = info->damping; 399 b->lu_zeropivot = info->zeropivot; 400 b->lu_shift = info->shift; 401 b->lu_shift_fraction = info->shift_fraction; 402 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 403 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 404 b->icol = isicol; 405 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 406 /* In b structure: Free imax, ilen, old a, old j. 407 Allocate idnew, solve_work, new a, new j */ 408 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar))); 409 b->maxnz = b->nz = ainew[n] ; 410 411 (*B)->factor = FACTOR_LU; 412 (*B)->info.factor_mallocs = realloc; 413 (*B)->info.fill_ratio_given = f; 414 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 415 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 416 417 if (ai[n] != 0) { 418 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 419 } else { 420 (*B)->info.fill_ratio_needed = 0.0; 421 } 422 PetscFunctionReturn(0); 423 } 424 /* ----------------------------------------------------------- */ 425 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 429 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 430 { 431 Mat C = *B; 432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 433 IS isrow = b->row,isicol = b->icol; 434 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 435 int *ajtmpold,*ajtmp,nz,row,*ics; 436 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 437 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 438 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 439 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 440 PetscTruth damp,lushift; 441 442 PetscFunctionBegin; 443 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 444 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 445 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 446 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 447 rtmps = rtmp; ics = ic; 448 449 if (!a->diag) { 450 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 451 } 452 453 if (b->lu_shift) { /* set max shift */ 454 int *aai = a->i,*ddiag = a->diag; 455 shift_top = 0; 456 for (i=0; i<n; i++) { 457 d = PetscAbsScalar((a->a)[ddiag[i]]); 458 /* calculate amt of shift needed for this row */ 459 if (d<=0) { 460 row_shift = 0; 461 } else { 462 row_shift = -2*d; 463 } 464 v = a->a+aai[i]; 465 for (j=0; j<aai[i+1]-aai[i]; j++) 466 row_shift += PetscAbsScalar(v[j]); 467 if (row_shift>shift_top) shift_top = row_shift; 468 } 469 } 470 471 shift_fraction = 0; shift_amount = 0; 472 do { 473 damp = PETSC_FALSE; 474 lushift = PETSC_FALSE; 475 for (i=0; i<n; i++) { 476 nz = ai[i+1] - ai[i]; 477 ajtmp = aj + ai[i]; 478 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 479 480 /* load in initial (unfactored row) */ 481 nz = a->i[r[i]+1] - a->i[r[i]]; 482 ajtmpold = a->j + a->i[r[i]]; 483 v = a->a + a->i[r[i]]; 484 for (j=0; j<nz; j++) { 485 rtmp[ics[ajtmpold[j]]] = v[j]; 486 } 487 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 488 489 row = *ajtmp++ ; 490 while (row < i) { 491 pc = rtmp + row; 492 if (*pc != 0.0) { 493 pv = b->a + diag_offset[row]; 494 pj = b->j + diag_offset[row] + 1; 495 multiplier = *pc / *pv++; 496 *pc = multiplier; 497 nz = ai[row+1] - diag_offset[row] - 1; 498 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 499 PetscLogFlops(2*nz); 500 } 501 row = *ajtmp++; 502 } 503 /* finished row so stick it into b->a */ 504 pv = b->a + ai[i] ; 505 pj = b->j + ai[i] ; 506 nz = ai[i+1] - ai[i]; 507 diag = diag_offset[i] - ai[i]; 508 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 509 rs = 0.0; 510 for (j=0; j<nz; j++) { 511 pv[j] = rtmps[pj[j]]; 512 if (j != diag) rs += PetscAbsScalar(pv[j]); 513 } 514 #define MAX_NSHIFT 5 515 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 516 if (nshift>MAX_NSHIFT) { 517 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 518 } else if (nshift==MAX_NSHIFT) { 519 shift_fraction = shift_hi; 520 lushift = PETSC_FALSE; 521 } else { 522 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 523 lushift = PETSC_TRUE; 524 } 525 shift_amount = shift_fraction * shift_top; 526 nshift++; 527 break; 528 } 529 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 530 if (damping) { 531 if (ndamp) damping *= 2.0; 532 damp = PETSC_TRUE; 533 ndamp++; 534 break; 535 } else { 536 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 537 } 538 } 539 } 540 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 541 /* 542 * if no shift in this attempt & shifting & started shifting & can refine, 543 * then try lower shift 544 */ 545 shift_hi = shift_fraction; 546 shift_fraction = (shift_hi+shift_lo)/2.; 547 shift_amount = shift_fraction * shift_top; 548 lushift = PETSC_TRUE; 549 nshift++; 550 } 551 } while (damp || lushift); 552 553 /* invert diagonal entries for simplier triangular solves */ 554 for (i=0; i<n; i++) { 555 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 556 } 557 558 ierr = PetscFree(rtmp);CHKERRQ(ierr); 559 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 560 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 561 C->factor = FACTOR_LU; 562 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 563 C->assembled = PETSC_TRUE; 564 PetscLogFlops(C->n); 565 if (ndamp) { 566 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 567 } 568 if (nshift) { 569 b->lu_shift_fraction = shift_fraction; 570 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %d\n",shift_fraction,shift_top,nshift); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 577 int MatUsePETSc_SeqAIJ(Mat A) 578 { 579 PetscFunctionBegin; 580 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 581 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 582 PetscFunctionReturn(0); 583 } 584 585 586 /* ----------------------------------------------------------- */ 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatLUFactor_SeqAIJ" 589 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 590 { 591 int ierr; 592 Mat C; 593 594 PetscFunctionBegin; 595 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 596 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 597 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 598 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 599 PetscFunctionReturn(0); 600 } 601 /* ----------------------------------------------------------- */ 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatSolve_SeqAIJ" 604 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 605 { 606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 607 IS iscol = a->col,isrow = a->row; 608 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 609 int nz,*rout,*cout; 610 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 611 612 PetscFunctionBegin; 613 if (!n) PetscFunctionReturn(0); 614 615 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 616 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 617 tmp = a->solve_work; 618 619 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 620 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 621 622 /* forward solve the lower triangular */ 623 tmp[0] = b[*r++]; 624 tmps = tmp; 625 for (i=1; i<n; i++) { 626 v = aa + ai[i] ; 627 vi = aj + ai[i] ; 628 nz = a->diag[i] - ai[i]; 629 sum = b[*r++]; 630 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 631 tmp[i] = sum; 632 } 633 634 /* backward solve the upper triangular */ 635 for (i=n-1; i>=0; i--){ 636 v = aa + a->diag[i] + 1; 637 vi = aj + a->diag[i] + 1; 638 nz = ai[i+1] - a->diag[i] - 1; 639 sum = tmp[i]; 640 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 641 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 642 } 643 644 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 646 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 647 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 648 PetscLogFlops(2*a->nz - A->n); 649 PetscFunctionReturn(0); 650 } 651 652 /* ----------------------------------------------------------- */ 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 655 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 656 { 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 658 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 659 PetscScalar *x,*b,*aa = a->a; 660 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 661 int adiag_i,i,*vi,nz,ai_i; 662 PetscScalar *v,sum; 663 #endif 664 665 PetscFunctionBegin; 666 if (!n) PetscFunctionReturn(0); 667 668 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 669 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 670 671 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 672 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 673 #else 674 /* forward solve the lower triangular */ 675 x[0] = b[0]; 676 for (i=1; i<n; i++) { 677 ai_i = ai[i]; 678 v = aa + ai_i; 679 vi = aj + ai_i; 680 nz = adiag[i] - ai_i; 681 sum = b[i]; 682 while (nz--) sum -= *v++ * x[*vi++]; 683 x[i] = sum; 684 } 685 686 /* backward solve the upper triangular */ 687 for (i=n-1; i>=0; i--){ 688 adiag_i = adiag[i]; 689 v = aa + adiag_i + 1; 690 vi = aj + adiag_i + 1; 691 nz = ai[i+1] - adiag_i - 1; 692 sum = x[i]; 693 while (nz--) sum -= *v++ * x[*vi++]; 694 x[i] = sum*aa[adiag_i]; 695 } 696 #endif 697 PetscLogFlops(2*a->nz - A->n); 698 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 705 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 706 { 707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 708 IS iscol = a->col,isrow = a->row; 709 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 710 int nz,*rout,*cout; 711 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 712 713 PetscFunctionBegin; 714 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 715 716 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 tmp = a->solve_work; 719 720 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 721 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 722 723 /* forward solve the lower triangular */ 724 tmp[0] = b[*r++]; 725 for (i=1; i<n; i++) { 726 v = aa + ai[i] ; 727 vi = aj + ai[i] ; 728 nz = a->diag[i] - ai[i]; 729 sum = b[*r++]; 730 while (nz--) sum -= *v++ * tmp[*vi++ ]; 731 tmp[i] = sum; 732 } 733 734 /* backward solve the upper triangular */ 735 for (i=n-1; i>=0; i--){ 736 v = aa + a->diag[i] + 1; 737 vi = aj + a->diag[i] + 1; 738 nz = ai[i+1] - a->diag[i] - 1; 739 sum = tmp[i]; 740 while (nz--) sum -= *v++ * tmp[*vi++ ]; 741 tmp[i] = sum*aa[a->diag[i]]; 742 x[*c--] += tmp[i]; 743 } 744 745 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 PetscLogFlops(2*a->nz); 750 751 PetscFunctionReturn(0); 752 } 753 /* -------------------------------------------------------------------*/ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 756 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 757 { 758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 759 IS iscol = a->col,isrow = a->row; 760 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 761 int nz,*rout,*cout,*diag = a->diag; 762 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 763 764 PetscFunctionBegin; 765 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 tmp = a->solve_work; 768 769 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 770 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 771 772 /* copy the b into temp work space according to permutation */ 773 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 774 775 /* forward solve the U^T */ 776 for (i=0; i<n; i++) { 777 v = aa + diag[i] ; 778 vi = aj + diag[i] + 1; 779 nz = ai[i+1] - diag[i] - 1; 780 s1 = tmp[i]; 781 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 782 while (nz--) { 783 tmp[*vi++ ] -= (*v++)*s1; 784 } 785 tmp[i] = s1; 786 } 787 788 /* backward solve the L^T */ 789 for (i=n-1; i>=0; i--){ 790 v = aa + diag[i] - 1 ; 791 vi = aj + diag[i] - 1 ; 792 nz = diag[i] - ai[i]; 793 s1 = tmp[i]; 794 while (nz--) { 795 tmp[*vi-- ] -= (*v--)*s1; 796 } 797 } 798 799 /* copy tmp into x according to permutation */ 800 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 801 802 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 803 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 804 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 806 807 PetscLogFlops(2*a->nz-A->n); 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 813 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 814 { 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 816 IS iscol = a->col,isrow = a->row; 817 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 818 int nz,*rout,*cout,*diag = a->diag; 819 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 820 821 PetscFunctionBegin; 822 if (zz != xx) VecCopy(zz,xx); 823 824 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 tmp = a->solve_work; 827 828 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 829 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 830 831 /* copy the b into temp work space according to permutation */ 832 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 833 834 /* forward solve the U^T */ 835 for (i=0; i<n; i++) { 836 v = aa + diag[i] ; 837 vi = aj + diag[i] + 1; 838 nz = ai[i+1] - diag[i] - 1; 839 tmp[i] *= *v++; 840 while (nz--) { 841 tmp[*vi++ ] -= (*v++)*tmp[i]; 842 } 843 } 844 845 /* backward solve the L^T */ 846 for (i=n-1; i>=0; i--){ 847 v = aa + diag[i] - 1 ; 848 vi = aj + diag[i] - 1 ; 849 nz = diag[i] - ai[i]; 850 while (nz--) { 851 tmp[*vi-- ] -= (*v--)*tmp[i]; 852 } 853 } 854 855 /* copy tmp into x according to permutation */ 856 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 857 858 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 859 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 860 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 861 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 862 863 PetscLogFlops(2*a->nz); 864 PetscFunctionReturn(0); 865 } 866 /* ----------------------------------------------------------------*/ 867 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 871 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 872 { 873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 874 IS isicol; 875 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 876 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 877 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 878 int incrlev,nnz,i,levels,diagonal_fill; 879 PetscTruth col_identity,row_identity; 880 PetscReal f; 881 882 PetscFunctionBegin; 883 f = info->fill; 884 levels = (int)info->levels; 885 diagonal_fill = (int)info->diagonal_fill; 886 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 887 888 /* special case that simply copies fill pattern */ 889 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 890 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 891 if (!levels && row_identity && col_identity) { 892 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 893 (*fact)->factor = FACTOR_LU; 894 b = (Mat_SeqAIJ*)(*fact)->data; 895 if (!b->diag) { 896 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 897 } 898 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 899 b->row = isrow; 900 b->col = iscol; 901 b->icol = isicol; 902 b->lu_damping = info->damping; 903 b->lu_zeropivot = info->zeropivot; 904 b->lu_shift = info->shift; 905 b->lu_shift_fraction= info->shift_fraction; 906 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 907 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 908 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 909 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 914 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 915 916 /* get new row pointers */ 917 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 918 ainew[0] = 0; 919 /* don't know how many column pointers are needed so estimate */ 920 jmax = (int)(f*(ai[n]+1)); 921 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 922 /* ajfill is level of fill for each fill entry */ 923 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 924 /* fill is a linked list of nonzeros in active row */ 925 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 926 /* im is level for each filled value */ 927 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 928 /* dloc is location of diagonal in factor */ 929 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 930 dloc[0] = 0; 931 for (prow=0; prow<n; prow++) { 932 933 /* copy current row into linked list */ 934 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 935 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 936 xi = aj + ai[r[prow]] ; 937 fill[n] = n; 938 fill[prow] = -1; /* marker to indicate if diagonal exists */ 939 while (nz--) { 940 fm = n; 941 idx = ic[*xi++ ]; 942 do { 943 m = fm; 944 fm = fill[m]; 945 } while (fm < idx); 946 fill[m] = idx; 947 fill[idx] = fm; 948 im[idx] = 0; 949 } 950 951 /* make sure diagonal entry is included */ 952 if (diagonal_fill && fill[prow] == -1) { 953 fm = n; 954 while (fill[fm] < prow) fm = fill[fm]; 955 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 956 fill[fm] = prow; 957 im[prow] = 0; 958 nzf++; 959 dcount++; 960 } 961 962 nzi = 0; 963 row = fill[n]; 964 while (row < prow) { 965 incrlev = im[row] + 1; 966 nz = dloc[row]; 967 xi = ajnew + ainew[row] + nz + 1; 968 flev = ajfill + ainew[row] + nz + 1; 969 nnz = ainew[row+1] - ainew[row] - nz - 1; 970 fm = row; 971 while (nnz-- > 0) { 972 idx = *xi++ ; 973 if (*flev + incrlev > levels) { 974 flev++; 975 continue; 976 } 977 do { 978 m = fm; 979 fm = fill[m]; 980 } while (fm < idx); 981 if (fm != idx) { 982 im[idx] = *flev + incrlev; 983 fill[m] = idx; 984 fill[idx] = fm; 985 fm = idx; 986 nzf++; 987 } else { 988 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 989 } 990 flev++; 991 } 992 row = fill[row]; 993 nzi++; 994 } 995 /* copy new filled row into permanent storage */ 996 ainew[prow+1] = ainew[prow] + nzf; 997 if (ainew[prow+1] > jmax) { 998 999 /* estimate how much additional space we will need */ 1000 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1001 /* just double the memory each time */ 1002 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1003 int maxadd = jmax; 1004 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1005 jmax += maxadd; 1006 1007 /* allocate a longer ajnew and ajfill */ 1008 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1009 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1010 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1011 ajnew = xi; 1012 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1013 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));CHKERRQ(ierr); 1014 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1015 ajfill = xi; 1016 realloc++; /* count how many times we realloc */ 1017 } 1018 xi = ajnew + ainew[prow] ; 1019 flev = ajfill + ainew[prow] ; 1020 dloc[prow] = nzi; 1021 fm = fill[n]; 1022 while (nzf--) { 1023 *xi++ = fm ; 1024 *flev++ = im[fm]; 1025 fm = fill[fm]; 1026 } 1027 /* make sure row has diagonal entry */ 1028 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1029 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1030 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1031 } 1032 } 1033 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1035 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1036 ierr = PetscFree(fill);CHKERRQ(ierr); 1037 ierr = PetscFree(im);CHKERRQ(ierr); 1038 1039 { 1040 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1043 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1044 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1045 if (diagonal_fill) { 1046 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1047 } 1048 } 1049 1050 /* put together the new matrix */ 1051 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1052 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1053 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1054 PetscLogObjectParent(*fact,isicol); 1055 b = (Mat_SeqAIJ*)(*fact)->data; 1056 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1057 b->singlemalloc = PETSC_FALSE; 1058 len = (ainew[n] )*sizeof(PetscScalar); 1059 /* the next line frees the default space generated by the Create() */ 1060 ierr = PetscFree(b->a);CHKERRQ(ierr); 1061 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1062 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1063 b->j = ajnew; 1064 b->i = ainew; 1065 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1066 b->diag = dloc; 1067 b->ilen = 0; 1068 b->imax = 0; 1069 b->row = isrow; 1070 b->col = iscol; 1071 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1072 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1073 b->icol = isicol; 1074 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1075 /* In b structure: Free imax, ilen, old a, old j. 1076 Allocate dloc, solve_work, new a, new j */ 1077 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar))); 1078 b->maxnz = b->nz = ainew[n] ; 1079 b->lu_damping = info->damping; 1080 b->lu_shift = info->shift; 1081 b->lu_shift_fraction = info->shift_fraction; 1082 b->lu_zeropivot = info->zeropivot; 1083 (*fact)->factor = FACTOR_LU; 1084 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1085 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1086 1087 (*fact)->info.factor_mallocs = realloc; 1088 (*fact)->info.fill_ratio_given = f; 1089 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #include "src/mat/impls/sbaij/seq/sbaij.h" 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1096 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1097 { 1098 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1099 int ierr; 1100 1101 PetscFunctionBegin; 1102 if (!a->sbaijMat){ 1103 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1104 } 1105 1106 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1107 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1108 a->sbaijMat = PETSC_NULL; 1109 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1115 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1116 { 1117 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1118 int ierr; 1119 PetscTruth perm_identity; 1120 1121 PetscFunctionBegin; 1122 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1123 if (!perm_identity){ 1124 SETERRQ(1,"Non-identity permutation is not supported yet"); 1125 } 1126 if (!a->sbaijMat){ 1127 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1128 } 1129 1130 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1131 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1132 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1138 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1139 { 1140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1141 int ierr; 1142 PetscTruth perm_identity; 1143 1144 PetscFunctionBegin; 1145 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1146 if (!perm_identity){ 1147 SETERRQ(1,"Non-identity permutation is not supported yet"); 1148 } 1149 if (!a->sbaijMat){ 1150 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1151 } 1152 1153 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1154 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1155 1156 PetscFunctionReturn(0); 1157 } 1158