1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 #include "src/inline/spops.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ishift = 0, for indices start at 1 50 ishift = 1, for indices starting at 0 51 ------------------------------------------------------------ 52 */ 53 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 int *c,*r,*ic,ierr,i,n = A->m; 65 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 int *ordcol,*iwk,*iperm,*jw; 67 int ishift = !a->indexshift; 68 int jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal permtol,af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (int)(info->dtcount/2.0); 79 jmax = (int)(info->fill*a->nz); 80 permtol = info->dtcol; 81 82 83 /* ------------------------------------------------------------ 84 If reorder=.TRUE., then the original matrix has to be 85 reordered to reflect the user selected ordering scheme, and 86 then de-reordered so it is in it's original format. 87 Because Saad's dperm() is NOT in place, we have to copy 88 the original matrix and allocate more storage. . . 89 ------------------------------------------------------------ 90 */ 91 92 /* set reorder to true if either isrow or iscol is not identity */ 93 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 94 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 95 reorder = PetscNot(reorder); 96 97 98 /* storage for ilu factor */ 99 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 101 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 102 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 103 104 /* ------------------------------------------------------------ 105 Make sure that everything is Fortran formatted (1-Based) 106 ------------------------------------------------------------ 107 */ 108 if (ishift) { 109 for (i=old_i[0];i<old_i[n];i++) { 110 old_j[i]++; 111 } 112 for(i=0;i<n+1;i++) { 113 old_i[i]++; 114 }; 115 }; 116 117 if (reorder) { 118 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 119 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 120 for(i=0;i<n;i++) { 121 r[i] = r[i]+1; 122 c[i] = c[i]+1; 123 } 124 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 128 for (i=0;i<n;i++) { 129 r[i] = r[i]-1; 130 c[i] = c[i]-1; 131 } 132 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 134 o_a = old_a2; 135 o_j = old_j2; 136 o_i = old_i2; 137 } else { 138 o_a = old_a; 139 o_j = old_j; 140 o_i = old_i; 141 } 142 143 /* ------------------------------------------------------------ 144 Call Saad's ilutp() routine to generate the factorization 145 ------------------------------------------------------------ 146 */ 147 148 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 149 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 150 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 151 152 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 153 if (ierr) { 154 switch (ierr) { 155 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 156 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 157 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 158 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 159 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 160 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 161 } 162 } 163 164 ierr = PetscFree(w);CHKERRQ(ierr); 165 ierr = PetscFree(jw);CHKERRQ(ierr); 166 167 /* ------------------------------------------------------------ 168 Saad's routine gives the result in Modified Sparse Row (msr) 169 Convert to Compressed Sparse Row format (csr) 170 ------------------------------------------------------------ 171 */ 172 173 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 174 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 175 176 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 177 178 ierr = PetscFree(iwk);CHKERRQ(ierr); 179 ierr = PetscFree(wk);CHKERRQ(ierr); 180 181 if (reorder) { 182 ierr = PetscFree(old_a2);CHKERRQ(ierr); 183 ierr = PetscFree(old_j2);CHKERRQ(ierr); 184 ierr = PetscFree(old_i2);CHKERRQ(ierr); 185 } else { 186 /* fix permutation of old_j that the factorization introduced */ 187 for (i=old_i[0]; i<old_i[n]; i++) { 188 old_j[i-1] = iperm[old_j[i-1]-1]; 189 } 190 } 191 192 /* get rid of the shift to indices starting at 1 */ 193 if (ishift) { 194 for (i=0; i<n+1; i++) { 195 old_i[i]--; 196 } 197 for (i=old_i[0];i<old_i[n];i++) { 198 old_j[i]--; 199 } 200 } 201 202 /* Make the factored matrix 0-based */ 203 if (ishift) { 204 for (i=0; i<n+1; i++) { 205 new_i[i]--; 206 } 207 for (i=new_i[0];i<new_i[n];i++) { 208 new_j[i]--; 209 } 210 } 211 212 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 213 /*-- permute the right-hand-side and solution vectors --*/ 214 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 215 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 216 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 217 for(i=0; i<n; i++) { 218 ordcol[i] = ic[iperm[i]-1]; 219 }; 220 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 221 ierr = ISDestroy(isicol);CHKERRQ(ierr); 222 223 ierr = PetscFree(iperm);CHKERRQ(ierr); 224 225 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 226 ierr = PetscFree(ordcol);CHKERRQ(ierr); 227 228 /*----- put together the new matrix -----*/ 229 230 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 231 (*fact)->factor = FACTOR_LU; 232 (*fact)->assembled = PETSC_TRUE; 233 234 b = (Mat_SeqAIJ*)(*fact)->data; 235 ierr = PetscFree(b->imax);CHKERRQ(ierr); 236 b->sorted = PETSC_FALSE; 237 b->singlemalloc = PETSC_FALSE; 238 /* the next line frees the default space generated by the MatCreate() */ 239 ierr = PetscFree(b->a);CHKERRQ(ierr); 240 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 241 b->a = new_a; 242 b->j = new_j; 243 b->i = new_i; 244 b->ilen = 0; 245 b->imax = 0; 246 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 247 b->row = isirow; 248 b->col = iscolf; 249 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 250 b->maxnz = b->nz = new_i[n]; 251 b->indexshift = a->indexshift; 252 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 253 (*fact)->info.factor_mallocs = 0; 254 255 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 256 257 /* check out for identical nodes. If found, use inode functions */ 258 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 259 260 af = ((double)b->nz)/((double)a->nz) + .001; 261 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 262 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 263 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 264 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 265 266 PetscFunctionReturn(0); 267 #endif 268 } 269 270 /* 271 Factorization code for AIJ format. 272 */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 275 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 280 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 281 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 282 PetscReal f; 283 284 PetscFunctionBegin; 285 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 286 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 287 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 288 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 289 290 /* get new row pointers */ 291 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 292 ainew[0] = -shift; 293 /* don't know how many column pointers are needed so estimate */ 294 f = info->fill; 295 jmax = (int)(f*ai[n]+(!shift)); 296 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 297 /* fill is a linked list of nonzeros in active row */ 298 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 299 im = fill + n + 1; 300 /* idnew is location of diagonal in factor */ 301 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 302 idnew[0] = -shift; 303 304 for (i=0; i<n; i++) { 305 /* first copy previous fill into linked list */ 306 nnz = nz = ai[r[i]+1] - ai[r[i]]; 307 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]] + shift; 309 fill[n] = n; 310 while (nz--) { 311 fm = n; 312 idx = ic[*ajtmp++ + shift]; 313 do { 314 m = fm; 315 fm = fill[m]; 316 } while (fm < idx); 317 fill[m] = idx; 318 fill[idx] = fm; 319 } 320 row = fill[n]; 321 while (row < i) { 322 ajtmp = ajnew + idnew[row] + (!shift); 323 nzbd = 1 + idnew[row] - ainew[row]; 324 nz = im[row] - nzbd; 325 fm = row; 326 while (nz-- > 0) { 327 idx = *ajtmp++ + shift; 328 nzbd++; 329 if (idx == i) im[row] = nzbd; 330 do { 331 m = fm; 332 fm = fill[m]; 333 } while (fm < idx); 334 if (fm != idx) { 335 fill[m] = idx; 336 fill[idx] = fm; 337 fm = idx; 338 nnz++; 339 } 340 } 341 row = fill[row]; 342 } 343 /* copy new filled row into permanent storage */ 344 ainew[i+1] = ainew[i] + nnz; 345 if (ainew[i+1] > jmax) { 346 347 /* estimate how much additional space we will need */ 348 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 349 /* just double the memory each time */ 350 int maxadd = jmax; 351 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 352 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 353 jmax += maxadd; 354 355 /* allocate a longer ajnew */ 356 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 358 ierr = PetscFree(ajnew);CHKERRQ(ierr); 359 ajnew = ajtmp; 360 realloc++; /* count how many times we realloc */ 361 } 362 ajtmp = ajnew + ainew[i] + shift; 363 fm = fill[n]; 364 nzi = 0; 365 im[i] = nnz; 366 while (nnz--) { 367 if (fm < i) nzi++; 368 *ajtmp++ = fm - shift; 369 fm = fill[fm]; 370 } 371 idnew[i] = ainew[i] + nzi; 372 } 373 if (ai[n] != 0) { 374 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 375 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 376 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 377 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 378 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 379 } else { 380 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 381 } 382 383 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 385 386 ierr = PetscFree(fill);CHKERRQ(ierr); 387 388 /* put together the new matrix */ 389 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 390 PetscLogObjectParent(*B,isicol); 391 b = (Mat_SeqAIJ*)(*B)->data; 392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 393 b->singlemalloc = PETSC_FALSE; 394 /* the next line frees the default space generated by the Create() */ 395 ierr = PetscFree(b->a);CHKERRQ(ierr); 396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 397 ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 398 b->j = ajnew; 399 b->i = ainew; 400 b->diag = idnew; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 b->lu_damping = info->damping; 406 b->lu_zeropivot = info->zeropivot; 407 b->lu_shift = info->lu_shift; 408 b->lu_shift_fraction = info->lu_shift_fraction; 409 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 411 b->icol = isicol; 412 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 413 /* In b structure: Free imax, ilen, old a, old j. 414 Allocate idnew, solve_work, new a, new j */ 415 PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar))); 416 b->maxnz = b->nz = ainew[n] + shift; 417 418 (*B)->factor = FACTOR_LU; 419 (*B)->info.factor_mallocs = realloc; 420 (*B)->info.fill_ratio_given = f; 421 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 422 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 423 424 if (ai[n] != 0) { 425 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 426 } else { 427 (*B)->info.fill_ratio_needed = 0.0; 428 } 429 PetscFunctionReturn(0); 430 } 431 /* ----------------------------------------------------------- */ 432 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 436 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 437 { 438 Mat C = *B; 439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 440 IS isrow = b->row,isicol = b->icol; 441 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 442 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 443 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 444 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 445 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 446 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 447 PetscTruth damp,lushift; 448 449 PetscFunctionBegin; 450 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 451 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 452 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 453 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 454 rtmps = rtmp + shift; ics = ic + shift; 455 456 if (!a->diag) { 457 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 458 } 459 460 if (b->lu_shift) { /* set max shift */ 461 int *aai = a->i,*ddiag = a->diag; 462 shift_top = 0; 463 for (i=0; i<n; i++) { 464 d = PetscAbsScalar((a->a)[ddiag[i]+shift]); 465 /* calculate amt of shift needed for this row */ 466 if (d<0) row_shift = 0; else row_shift = -2*d; 467 v = a->a+aai[i]+shift; 468 for (j=0; j<aai[i+1]-aai[i]; j++) 469 row_shift += PetscAbsScalar(v[j]); 470 if (row_shift>shift_top) shift_top = row_shift; 471 } 472 } 473 474 shift_fraction = 0; shift_amount = 0; 475 do { 476 damp = PETSC_FALSE; 477 lushift = PETSC_FALSE; 478 for (i=0; i<n; i++) { 479 nz = ai[i+1] - ai[i]; 480 ajtmp = aj + ai[i] + shift; 481 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 482 483 /* load in initial (unfactored row) */ 484 nz = a->i[r[i]+1] - a->i[r[i]]; 485 ajtmpold = a->j + a->i[r[i]] + shift; 486 v = a->a + a->i[r[i]] + shift; 487 for (j=0; j<nz; j++) { 488 rtmp[ics[ajtmpold[j]]] = v[j]; 489 } 490 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 491 492 row = *ajtmp++ + shift; 493 while (row < i) { 494 pc = rtmp + row; 495 if (*pc != 0.0) { 496 pv = b->a + diag_offset[row] + shift; 497 pj = b->j + diag_offset[row] + (!shift); 498 multiplier = *pc / *pv++; 499 *pc = multiplier; 500 nz = ai[row+1] - diag_offset[row] - 1; 501 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 502 PetscLogFlops(2*nz); 503 } 504 row = *ajtmp++ + shift; 505 } 506 /* finished row so stick it into b->a */ 507 pv = b->a + ai[i] + shift; 508 pj = b->j + ai[i] + shift; 509 nz = ai[i+1] - ai[i]; 510 diag = diag_offset[i] - ai[i]; 511 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 512 rs = 0.0; 513 for (j=0; j<nz; j++) { 514 pv[j] = rtmps[pj[j]]; 515 if (j != diag) rs += PetscAbsScalar(pv[j]); 516 } 517 #define MAX_NSHIFT 5 518 if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) { 519 if (nshift>MAX_NSHIFT) { 520 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 521 } else if (nshift==MAX_NSHIFT) { 522 shift_fraction = shift_hi; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 } 526 shift_amount = shift_fraction * shift_top; 527 lushift = PETSC_TRUE; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if not already shifting up & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction); 573 } 574 PetscFunctionReturn(0); 575 } 576 /* ----------------------------------------------------------- */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactor_SeqAIJ" 579 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 580 { 581 int ierr; 582 Mat C; 583 584 PetscFunctionBegin; 585 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 586 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 587 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 588 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 589 PetscFunctionReturn(0); 590 } 591 /* ----------------------------------------------------------- */ 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSolve_SeqAIJ" 594 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 IS iscol = a->col,isrow = a->row; 598 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 599 int nz,shift = a->indexshift,*rout,*cout; 600 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 601 602 PetscFunctionBegin; 603 if (!n) PetscFunctionReturn(0); 604 605 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607 tmp = a->solve_work; 608 609 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 610 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 611 612 /* forward solve the lower triangular */ 613 tmp[0] = b[*r++]; 614 tmps = tmp + shift; 615 for (i=1; i<n; i++) { 616 v = aa + ai[i] + shift; 617 vi = aj + ai[i] + shift; 618 nz = a->diag[i] - ai[i]; 619 sum = b[*r++]; 620 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 621 tmp[i] = sum; 622 } 623 624 /* backward solve the upper triangular */ 625 for (i=n-1; i>=0; i--){ 626 v = aa + a->diag[i] + (!shift); 627 vi = aj + a->diag[i] + (!shift); 628 nz = ai[i+1] - a->diag[i] - 1; 629 sum = tmp[i]; 630 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 631 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 632 } 633 634 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 635 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 636 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 637 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 638 PetscLogFlops(2*a->nz - A->n); 639 PetscFunctionReturn(0); 640 } 641 642 /* ----------------------------------------------------------- */ 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 645 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 649 PetscScalar *x,*b,*aa = a->a; 650 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 651 int adiag_i,i,*vi,nz,ai_i; 652 PetscScalar *v,sum; 653 #endif 654 655 PetscFunctionBegin; 656 if (!n) PetscFunctionReturn(0); 657 if (a->indexshift) { 658 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 666 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 667 #else 668 /* forward solve the lower triangular */ 669 x[0] = b[0]; 670 for (i=1; i<n; i++) { 671 ai_i = ai[i]; 672 v = aa + ai_i; 673 vi = aj + ai_i; 674 nz = adiag[i] - ai_i; 675 sum = b[i]; 676 while (nz--) sum -= *v++ * x[*vi++]; 677 x[i] = sum; 678 } 679 680 /* backward solve the upper triangular */ 681 for (i=n-1; i>=0; i--){ 682 adiag_i = adiag[i]; 683 v = aa + adiag_i + 1; 684 vi = aj + adiag_i + 1; 685 nz = ai[i+1] - adiag_i - 1; 686 sum = x[i]; 687 while (nz--) sum -= *v++ * x[*vi++]; 688 x[i] = sum*aa[adiag_i]; 689 } 690 #endif 691 PetscLogFlops(2*a->nz - A->n); 692 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 699 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 700 { 701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 702 IS iscol = a->col,isrow = a->row; 703 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 704 int nz,shift = a->indexshift,*rout,*cout; 705 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 706 707 PetscFunctionBegin; 708 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 709 710 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 tmp = a->solve_work; 713 714 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 715 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 716 717 /* forward solve the lower triangular */ 718 tmp[0] = b[*r++]; 719 for (i=1; i<n; i++) { 720 v = aa + ai[i] + shift; 721 vi = aj + ai[i] + shift; 722 nz = a->diag[i] - ai[i]; 723 sum = b[*r++]; 724 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 725 tmp[i] = sum; 726 } 727 728 /* backward solve the upper triangular */ 729 for (i=n-1; i>=0; i--){ 730 v = aa + a->diag[i] + (!shift); 731 vi = aj + a->diag[i] + (!shift); 732 nz = ai[i+1] - a->diag[i] - 1; 733 sum = tmp[i]; 734 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 735 tmp[i] = sum*aa[a->diag[i]+shift]; 736 x[*c--] += tmp[i]; 737 } 738 739 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 740 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 741 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 PetscLogFlops(2*a->nz); 744 745 PetscFunctionReturn(0); 746 } 747 /* -------------------------------------------------------------------*/ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 750 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 751 { 752 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 753 IS iscol = a->col,isrow = a->row; 754 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 755 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 756 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 757 758 PetscFunctionBegin; 759 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 760 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761 tmp = a->solve_work; 762 763 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 764 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 765 766 /* copy the b into temp work space according to permutation */ 767 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 768 769 /* forward solve the U^T */ 770 for (i=0; i<n; i++) { 771 v = aa + diag[i] + shift; 772 vi = aj + diag[i] + (!shift); 773 nz = ai[i+1] - diag[i] - 1; 774 s1 = tmp[i]; 775 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 776 while (nz--) { 777 tmp[*vi++ + shift] -= (*v++)*s1; 778 } 779 tmp[i] = s1; 780 } 781 782 /* backward solve the L^T */ 783 for (i=n-1; i>=0; i--){ 784 v = aa + diag[i] - 1 + shift; 785 vi = aj + diag[i] - 1 + shift; 786 nz = diag[i] - ai[i]; 787 s1 = tmp[i]; 788 while (nz--) { 789 tmp[*vi-- + shift] -= (*v--)*s1; 790 } 791 } 792 793 /* copy tmp into x according to permutation */ 794 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 795 796 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 797 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 798 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 801 PetscLogFlops(2*a->nz-A->n); 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 807 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 808 { 809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 810 IS iscol = a->col,isrow = a->row; 811 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 812 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 813 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 814 815 PetscFunctionBegin; 816 if (zz != xx) VecCopy(zz,xx); 817 818 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 820 tmp = a->solve_work; 821 822 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 823 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 824 825 /* copy the b into temp work space according to permutation */ 826 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 827 828 /* forward solve the U^T */ 829 for (i=0; i<n; i++) { 830 v = aa + diag[i] + shift; 831 vi = aj + diag[i] + (!shift); 832 nz = ai[i+1] - diag[i] - 1; 833 tmp[i] *= *v++; 834 while (nz--) { 835 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 836 } 837 } 838 839 /* backward solve the L^T */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + diag[i] - 1 + shift; 842 vi = aj + diag[i] - 1 + shift; 843 nz = diag[i] - ai[i]; 844 while (nz--) { 845 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 846 } 847 } 848 849 /* copy tmp into x according to permutation */ 850 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 851 852 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 853 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 854 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 855 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 856 857 PetscLogFlops(2*a->nz); 858 PetscFunctionReturn(0); 859 } 860 /* ----------------------------------------------------------------*/ 861 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 865 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 866 { 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 868 IS isicol; 869 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 870 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 871 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 872 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 873 PetscTruth col_identity,row_identity; 874 PetscReal f; 875 876 PetscFunctionBegin; 877 f = info->fill; 878 levels = (int)info->levels; 879 diagonal_fill = (int)info->diagonal_fill; 880 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 881 882 /* special case that simply copies fill pattern */ 883 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 884 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 885 if (!levels && row_identity && col_identity) { 886 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 887 (*fact)->factor = FACTOR_LU; 888 b = (Mat_SeqAIJ*)(*fact)->data; 889 if (!b->diag) { 890 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 891 } 892 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 893 b->row = isrow; 894 b->col = iscol; 895 b->icol = isicol; 896 b->lu_damping = info->damping; 897 b->lu_zeropivot = info->zeropivot; 898 b->lu_shift = info->lu_shift; 899 b->lu_shift_fraction= info->lu_shift_fraction; 900 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 901 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 902 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 903 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 908 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 909 910 /* get new row pointers */ 911 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 912 ainew[0] = -shift; 913 /* don't know how many column pointers are needed so estimate */ 914 jmax = (int)(f*(ai[n]+!shift)); 915 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 916 /* ajfill is level of fill for each fill entry */ 917 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 918 /* fill is a linked list of nonzeros in active row */ 919 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 920 /* im is level for each filled value */ 921 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 922 /* dloc is location of diagonal in factor */ 923 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 924 dloc[0] = 0; 925 for (prow=0; prow<n; prow++) { 926 927 /* copy current row into linked list */ 928 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 929 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 930 xi = aj + ai[r[prow]] + shift; 931 fill[n] = n; 932 fill[prow] = -1; /* marker to indicate if diagonal exists */ 933 while (nz--) { 934 fm = n; 935 idx = ic[*xi++ + shift]; 936 do { 937 m = fm; 938 fm = fill[m]; 939 } while (fm < idx); 940 fill[m] = idx; 941 fill[idx] = fm; 942 im[idx] = 0; 943 } 944 945 /* make sure diagonal entry is included */ 946 if (diagonal_fill && fill[prow] == -1) { 947 fm = n; 948 while (fill[fm] < prow) fm = fill[fm]; 949 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 950 fill[fm] = prow; 951 im[prow] = 0; 952 nzf++; 953 dcount++; 954 } 955 956 nzi = 0; 957 row = fill[n]; 958 while (row < prow) { 959 incrlev = im[row] + 1; 960 nz = dloc[row]; 961 xi = ajnew + ainew[row] + shift + nz + 1; 962 flev = ajfill + ainew[row] + shift + nz + 1; 963 nnz = ainew[row+1] - ainew[row] - nz - 1; 964 fm = row; 965 while (nnz-- > 0) { 966 idx = *xi++ + shift; 967 if (*flev + incrlev > levels) { 968 flev++; 969 continue; 970 } 971 do { 972 m = fm; 973 fm = fill[m]; 974 } while (fm < idx); 975 if (fm != idx) { 976 im[idx] = *flev + incrlev; 977 fill[m] = idx; 978 fill[idx] = fm; 979 fm = idx; 980 nzf++; 981 } else { 982 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 983 } 984 flev++; 985 } 986 row = fill[row]; 987 nzi++; 988 } 989 /* copy new filled row into permanent storage */ 990 ainew[prow+1] = ainew[prow] + nzf; 991 if (ainew[prow+1] > jmax-shift) { 992 993 /* estimate how much additional space we will need */ 994 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 995 /* just double the memory each time */ 996 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 997 int maxadd = jmax; 998 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 999 jmax += maxadd; 1000 1001 /* allocate a longer ajnew and ajfill */ 1002 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1003 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1004 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1005 ajnew = xi; 1006 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1007 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1008 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1009 ajfill = xi; 1010 realloc++; /* count how many times we realloc */ 1011 } 1012 xi = ajnew + ainew[prow] + shift; 1013 flev = ajfill + ainew[prow] + shift; 1014 dloc[prow] = nzi; 1015 fm = fill[n]; 1016 while (nzf--) { 1017 *xi++ = fm - shift; 1018 *flev++ = im[fm]; 1019 fm = fill[fm]; 1020 } 1021 /* make sure row has diagonal entry */ 1022 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1023 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1024 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1025 } 1026 } 1027 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1028 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1029 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1030 ierr = PetscFree(fill);CHKERRQ(ierr); 1031 ierr = PetscFree(im);CHKERRQ(ierr); 1032 1033 { 1034 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1035 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1036 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1037 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1039 if (diagonal_fill) { 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1041 } 1042 } 1043 1044 /* put together the new matrix */ 1045 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1046 PetscLogObjectParent(*fact,isicol); 1047 b = (Mat_SeqAIJ*)(*fact)->data; 1048 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1049 b->singlemalloc = PETSC_FALSE; 1050 len = (ainew[n] + shift)*sizeof(PetscScalar); 1051 /* the next line frees the default space generated by the Create() */ 1052 ierr = PetscFree(b->a);CHKERRQ(ierr); 1053 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1054 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1055 b->j = ajnew; 1056 b->i = ainew; 1057 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1058 b->diag = dloc; 1059 b->ilen = 0; 1060 b->imax = 0; 1061 b->row = isrow; 1062 b->col = iscol; 1063 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1064 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1065 b->icol = isicol; 1066 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1067 /* In b structure: Free imax, ilen, old a, old j. 1068 Allocate dloc, solve_work, new a, new j */ 1069 PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar))); 1070 b->maxnz = b->nz = ainew[n] + shift; 1071 b->lu_damping = info->damping; 1072 b->lu_shift = info->lu_shift; 1073 b->lu_shift_fraction = info->lu_shift_fraction; 1074 b->lu_zeropivot = info->zeropivot; 1075 (*fact)->factor = FACTOR_LU; 1076 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1077 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1078 1079 (*fact)->info.factor_mallocs = realloc; 1080 (*fact)->info.fill_ratio_given = f; 1081 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #include "src/mat/impls/sbaij/seq/sbaij.h" 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1088 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1089 { 1090 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1091 int ierr; 1092 1093 PetscFunctionBegin; 1094 if (!a->sbaijMat){ 1095 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1096 } 1097 1098 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1099 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1100 a->sbaijMat = PETSC_NULL; 1101 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1107 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1108 { 1109 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1110 int ierr; 1111 PetscTruth perm_identity; 1112 1113 PetscFunctionBegin; 1114 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1115 if (!perm_identity){ 1116 SETERRQ(1,"Non-identity permutation is not supported yet"); 1117 } 1118 if (!a->sbaijMat){ 1119 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1120 } 1121 1122 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1123 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1130 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1133 int ierr; 1134 PetscTruth perm_identity; 1135 1136 PetscFunctionBegin; 1137 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1138 if (!perm_identity){ 1139 SETERRQ(1,"Non-identity permutation is not supported yet"); 1140 } 1141 if (!a->sbaijMat){ 1142 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1143 } 1144 1145 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1146 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1147 1148 PetscFunctionReturn(0); 1149 } 1150