1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 #include "src/inline/spops.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ishift = 0, for indices start at 1 50 ishift = 1, for indices starting at 0 51 ------------------------------------------------------------ 52 */ 53 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 int *c,*r,*ic,ierr,i,n = A->m; 65 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 int *ordcol,*iwk,*iperm,*jw; 67 int ishift = !a->indexshift; 68 int jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal permtol,af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (int)(info->dtcount/2.0); 79 jmax = (int)(info->fill*a->nz); 80 permtol = info->dtcol; 81 82 83 /* ------------------------------------------------------------ 84 If reorder=.TRUE., then the original matrix has to be 85 reordered to reflect the user selected ordering scheme, and 86 then de-reordered so it is in it's original format. 87 Because Saad's dperm() is NOT in place, we have to copy 88 the original matrix and allocate more storage. . . 89 ------------------------------------------------------------ 90 */ 91 92 /* set reorder to true if either isrow or iscol is not identity */ 93 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 94 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 95 reorder = PetscNot(reorder); 96 97 98 /* storage for ilu factor */ 99 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 101 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 102 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 103 104 /* ------------------------------------------------------------ 105 Make sure that everything is Fortran formatted (1-Based) 106 ------------------------------------------------------------ 107 */ 108 if (ishift) { 109 for (i=old_i[0];i<old_i[n];i++) { 110 old_j[i]++; 111 } 112 for(i=0;i<n+1;i++) { 113 old_i[i]++; 114 }; 115 }; 116 117 if (reorder) { 118 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 119 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 120 for(i=0;i<n;i++) { 121 r[i] = r[i]+1; 122 c[i] = c[i]+1; 123 } 124 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 128 for (i=0;i<n;i++) { 129 r[i] = r[i]-1; 130 c[i] = c[i]-1; 131 } 132 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 134 o_a = old_a2; 135 o_j = old_j2; 136 o_i = old_i2; 137 } else { 138 o_a = old_a; 139 o_j = old_j; 140 o_i = old_i; 141 } 142 143 /* ------------------------------------------------------------ 144 Call Saad's ilutp() routine to generate the factorization 145 ------------------------------------------------------------ 146 */ 147 148 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 149 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 150 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 151 152 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 153 if (ierr) { 154 switch (ierr) { 155 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 156 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 157 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 158 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 159 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 160 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 161 } 162 } 163 164 ierr = PetscFree(w);CHKERRQ(ierr); 165 ierr = PetscFree(jw);CHKERRQ(ierr); 166 167 /* ------------------------------------------------------------ 168 Saad's routine gives the result in Modified Sparse Row (msr) 169 Convert to Compressed Sparse Row format (csr) 170 ------------------------------------------------------------ 171 */ 172 173 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 174 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 175 176 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 177 178 ierr = PetscFree(iwk);CHKERRQ(ierr); 179 ierr = PetscFree(wk);CHKERRQ(ierr); 180 181 if (reorder) { 182 ierr = PetscFree(old_a2);CHKERRQ(ierr); 183 ierr = PetscFree(old_j2);CHKERRQ(ierr); 184 ierr = PetscFree(old_i2);CHKERRQ(ierr); 185 } else { 186 /* fix permutation of old_j that the factorization introduced */ 187 for (i=old_i[0]; i<old_i[n]; i++) { 188 old_j[i-1] = iperm[old_j[i-1]-1]; 189 } 190 } 191 192 /* get rid of the shift to indices starting at 1 */ 193 if (ishift) { 194 for (i=0; i<n+1; i++) { 195 old_i[i]--; 196 } 197 for (i=old_i[0];i<old_i[n];i++) { 198 old_j[i]--; 199 } 200 } 201 202 /* Make the factored matrix 0-based */ 203 if (ishift) { 204 for (i=0; i<n+1; i++) { 205 new_i[i]--; 206 } 207 for (i=new_i[0];i<new_i[n];i++) { 208 new_j[i]--; 209 } 210 } 211 212 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 213 /*-- permute the right-hand-side and solution vectors --*/ 214 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 215 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 216 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 217 for(i=0; i<n; i++) { 218 ordcol[i] = ic[iperm[i]-1]; 219 }; 220 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 221 ierr = ISDestroy(isicol);CHKERRQ(ierr); 222 223 ierr = PetscFree(iperm);CHKERRQ(ierr); 224 225 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 226 ierr = PetscFree(ordcol);CHKERRQ(ierr); 227 228 /*----- put together the new matrix -----*/ 229 230 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 231 (*fact)->factor = FACTOR_LU; 232 (*fact)->assembled = PETSC_TRUE; 233 234 b = (Mat_SeqAIJ*)(*fact)->data; 235 ierr = PetscFree(b->imax);CHKERRQ(ierr); 236 b->sorted = PETSC_FALSE; 237 b->singlemalloc = PETSC_FALSE; 238 /* the next line frees the default space generated by the MatCreate() */ 239 ierr = PetscFree(b->a);CHKERRQ(ierr); 240 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 241 b->a = new_a; 242 b->j = new_j; 243 b->i = new_i; 244 b->ilen = 0; 245 b->imax = 0; 246 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 247 b->row = isirow; 248 b->col = iscolf; 249 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 250 b->maxnz = b->nz = new_i[n]; 251 b->indexshift = a->indexshift; 252 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 253 (*fact)->info.factor_mallocs = 0; 254 255 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 256 257 /* check out for identical nodes. If found, use inode functions */ 258 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 259 260 af = ((double)b->nz)/((double)a->nz) + .001; 261 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 262 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 263 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 264 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 265 266 PetscFunctionReturn(0); 267 #endif 268 } 269 270 /* 271 Factorization code for AIJ format. 272 */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 275 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 280 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 281 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 282 PetscReal f; 283 284 PetscFunctionBegin; 285 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 286 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 287 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 288 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 289 290 /* get new row pointers */ 291 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 292 ainew[0] = -shift; 293 /* don't know how many column pointers are needed so estimate */ 294 f = info->fill; 295 jmax = (int)(f*ai[n]+(!shift)); 296 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 297 /* fill is a linked list of nonzeros in active row */ 298 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 299 im = fill + n + 1; 300 /* idnew is location of diagonal in factor */ 301 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 302 idnew[0] = -shift; 303 304 for (i=0; i<n; i++) { 305 /* first copy previous fill into linked list */ 306 nnz = nz = ai[r[i]+1] - ai[r[i]]; 307 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]] + shift; 309 fill[n] = n; 310 while (nz--) { 311 fm = n; 312 idx = ic[*ajtmp++ + shift]; 313 do { 314 m = fm; 315 fm = fill[m]; 316 } while (fm < idx); 317 fill[m] = idx; 318 fill[idx] = fm; 319 } 320 row = fill[n]; 321 while (row < i) { 322 ajtmp = ajnew + idnew[row] + (!shift); 323 nzbd = 1 + idnew[row] - ainew[row]; 324 nz = im[row] - nzbd; 325 fm = row; 326 while (nz-- > 0) { 327 idx = *ajtmp++ + shift; 328 nzbd++; 329 if (idx == i) im[row] = nzbd; 330 do { 331 m = fm; 332 fm = fill[m]; 333 } while (fm < idx); 334 if (fm != idx) { 335 fill[m] = idx; 336 fill[idx] = fm; 337 fm = idx; 338 nnz++; 339 } 340 } 341 row = fill[row]; 342 } 343 /* copy new filled row into permanent storage */ 344 ainew[i+1] = ainew[i] + nnz; 345 if (ainew[i+1] > jmax) { 346 347 /* estimate how much additional space we will need */ 348 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 349 /* just double the memory each time */ 350 int maxadd = jmax; 351 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 352 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 353 jmax += maxadd; 354 355 /* allocate a longer ajnew */ 356 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 358 ierr = PetscFree(ajnew);CHKERRQ(ierr); 359 ajnew = ajtmp; 360 realloc++; /* count how many times we realloc */ 361 } 362 ajtmp = ajnew + ainew[i] + shift; 363 fm = fill[n]; 364 nzi = 0; 365 im[i] = nnz; 366 while (nnz--) { 367 if (fm < i) nzi++; 368 *ajtmp++ = fm - shift; 369 fm = fill[fm]; 370 } 371 idnew[i] = ainew[i] + nzi; 372 } 373 if (ai[n] != 0) { 374 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 375 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 376 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 377 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 378 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 379 } else { 380 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 381 } 382 383 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 385 386 ierr = PetscFree(fill);CHKERRQ(ierr); 387 388 /* put together the new matrix */ 389 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 390 PetscLogObjectParent(*B,isicol); 391 b = (Mat_SeqAIJ*)(*B)->data; 392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 393 b->singlemalloc = PETSC_FALSE; 394 /* the next line frees the default space generated by the Create() */ 395 ierr = PetscFree(b->a);CHKERRQ(ierr); 396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 397 ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 398 b->j = ajnew; 399 b->i = ainew; 400 b->diag = idnew; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 b->lu_damping = info->damping; 406 b->lu_zeropivot = info->zeropivot; 407 b->lu_shift = info->lu_shift; 408 b->lu_shift_fraction = info->lu_shift_fraction; 409 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 411 b->icol = isicol; 412 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 413 /* In b structure: Free imax, ilen, old a, old j. 414 Allocate idnew, solve_work, new a, new j */ 415 PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar))); 416 b->maxnz = b->nz = ainew[n] + shift; 417 418 (*B)->factor = FACTOR_LU; 419 (*B)->info.factor_mallocs = realloc; 420 (*B)->info.fill_ratio_given = f; 421 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 422 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 423 424 if (ai[n] != 0) { 425 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 426 } else { 427 (*B)->info.fill_ratio_needed = 0.0; 428 } 429 PetscFunctionReturn(0); 430 } 431 /* ----------------------------------------------------------- */ 432 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 436 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 437 { 438 Mat C = *B; 439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 440 IS isrow = b->row,isicol = b->icol; 441 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 442 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 443 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 444 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 445 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 446 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 447 PetscTruth damp,lushift; 448 449 PetscFunctionBegin; 450 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 451 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 452 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 453 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 454 rtmps = rtmp + shift; ics = ic + shift; 455 456 if (!a->diag) { 457 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 458 } 459 460 if (b->lu_shift) { /* set max shift */ 461 int *aai = a->i,*ddiag = a->diag; 462 shift_top = 0; 463 for (i=0; i<n; i++) { 464 d = PetscAbsScalar((a->a)[ddiag[i]+shift]); 465 /* calculate amt of shift needed for this row */ 466 if (d<0) row_shift = 0; else row_shift = -2*PetscAbsScalar(d); 467 v = a->a+aai[i]+shift; 468 for (j=0; j<aai[i+1]-aai[i]; j++) 469 row_shift += PetscAbsScalar(v[j]); 470 if (row_shift>shift_top) shift_top = row_shift; 471 } 472 } 473 474 shift_fraction = 0; shift_amount = 0; 475 do { 476 damp = PETSC_FALSE; 477 lushift = PETSC_FALSE; 478 for (i=0; i<n; i++) { 479 nz = ai[i+1] - ai[i]; 480 ajtmp = aj + ai[i] + shift; 481 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 482 483 /* load in initial (unfactored row) */ 484 nz = a->i[r[i]+1] - a->i[r[i]]; 485 ajtmpold = a->j + a->i[r[i]] + shift; 486 v = a->a + a->i[r[i]] + shift; 487 for (j=0; j<nz; j++) { 488 rtmp[ics[ajtmpold[j]]] = v[j]; 489 if ( (ndamp||nshift) && ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */ 490 rtmp[ics[ajtmpold[j]]] += damping + shift_amount; 491 } 492 } 493 494 row = *ajtmp++ + shift; 495 while (row < i) { 496 pc = rtmp + row; 497 if (*pc != 0.0) { 498 pv = b->a + diag_offset[row] + shift; 499 pj = b->j + diag_offset[row] + (!shift); 500 multiplier = *pc / *pv++; 501 *pc = multiplier; 502 nz = ai[row+1] - diag_offset[row] - 1; 503 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 504 PetscLogFlops(2*nz); 505 } 506 row = *ajtmp++ + shift; 507 } 508 /* finished row so stick it into b->a */ 509 pv = b->a + ai[i] + shift; 510 pj = b->j + ai[i] + shift; 511 nz = ai[i+1] - ai[i]; 512 diag = diag_offset[i] - ai[i]; 513 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 514 rs = 0.0; 515 for (j=0; j<nz; j++) { 516 pv[j] = rtmps[pj[j]]; 517 if (j != diag) rs += PetscAbsScalar(pv[j]); 518 } 519 #define MAX_NSHIFT 5 520 if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) { 521 if (nshift>MAX_NSHIFT) { 522 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 523 } else if (nshift==MAX_NSHIFT) { 524 shift_fraction = shift_hi; 525 } else { 526 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 527 } 528 shift_amount = shift_fraction * shift_top; 529 lushift = PETSC_TRUE; 530 nshift++; 531 break; 532 } 533 if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 534 if (damping) { 535 if (ndamp) damping *= 2.0; 536 damp = PETSC_TRUE; 537 ndamp++; 538 break; 539 } else { 540 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 541 } 542 } 543 } 544 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 545 /* 546 * if not already shifting up & shifting & started shifting & can refine, 547 * then try lower shift 548 */ 549 shift_hi = shift_fraction; 550 shift_fraction = (shift_hi+shift_lo)/2.; 551 shift_amount = shift_fraction * shift_top; 552 lushift = PETSC_TRUE; 553 nshift++; 554 } 555 } while (damp || lushift); 556 557 /* invert diagonal entries for simplier triangular solves */ 558 for (i=0; i<n; i++) { 559 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 560 } 561 562 ierr = PetscFree(rtmp);CHKERRQ(ierr); 563 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 564 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 565 C->factor = FACTOR_LU; 566 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 567 C->assembled = PETSC_TRUE; 568 PetscLogFlops(C->n); 569 if (ndamp) { 570 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 571 } 572 if (nshift) { 573 b->lu_shift_fraction = shift_fraction; 574 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction); 575 } 576 PetscFunctionReturn(0); 577 } 578 /* ----------------------------------------------------------- */ 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatLUFactor_SeqAIJ" 581 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 582 { 583 int ierr; 584 Mat C; 585 586 PetscFunctionBegin; 587 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 588 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 589 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 590 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 591 PetscFunctionReturn(0); 592 } 593 /* ----------------------------------------------------------- */ 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatSolve_SeqAIJ" 596 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 597 { 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 599 IS iscol = a->col,isrow = a->row; 600 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 601 int nz,shift = a->indexshift,*rout,*cout; 602 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 603 604 PetscFunctionBegin; 605 if (!n) PetscFunctionReturn(0); 606 607 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 608 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 609 tmp = a->solve_work; 610 611 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 612 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 613 614 /* forward solve the lower triangular */ 615 tmp[0] = b[*r++]; 616 tmps = tmp + shift; 617 for (i=1; i<n; i++) { 618 v = aa + ai[i] + shift; 619 vi = aj + ai[i] + shift; 620 nz = a->diag[i] - ai[i]; 621 sum = b[*r++]; 622 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 623 tmp[i] = sum; 624 } 625 626 /* backward solve the upper triangular */ 627 for (i=n-1; i>=0; i--){ 628 v = aa + a->diag[i] + (!shift); 629 vi = aj + a->diag[i] + (!shift); 630 nz = ai[i+1] - a->diag[i] - 1; 631 sum = tmp[i]; 632 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 633 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 634 } 635 636 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 637 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 638 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 640 PetscLogFlops(2*a->nz - A->n); 641 PetscFunctionReturn(0); 642 } 643 644 /* ----------------------------------------------------------- */ 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 647 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 648 { 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 650 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 651 PetscScalar *x,*b,*aa = a->a; 652 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 653 int adiag_i,i,*vi,nz,ai_i; 654 PetscScalar *v,sum; 655 #endif 656 657 PetscFunctionBegin; 658 if (!n) PetscFunctionReturn(0); 659 if (a->indexshift) { 660 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 665 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 666 667 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 668 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 669 #else 670 /* forward solve the lower triangular */ 671 x[0] = b[0]; 672 for (i=1; i<n; i++) { 673 ai_i = ai[i]; 674 v = aa + ai_i; 675 vi = aj + ai_i; 676 nz = adiag[i] - ai_i; 677 sum = b[i]; 678 while (nz--) sum -= *v++ * x[*vi++]; 679 x[i] = sum; 680 } 681 682 /* backward solve the upper triangular */ 683 for (i=n-1; i>=0; i--){ 684 adiag_i = adiag[i]; 685 v = aa + adiag_i + 1; 686 vi = aj + adiag_i + 1; 687 nz = ai[i+1] - adiag_i - 1; 688 sum = x[i]; 689 while (nz--) sum -= *v++ * x[*vi++]; 690 x[i] = sum*aa[adiag_i]; 691 } 692 #endif 693 PetscLogFlops(2*a->nz - A->n); 694 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 695 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 701 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 702 { 703 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 704 IS iscol = a->col,isrow = a->row; 705 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 706 int nz,shift = a->indexshift,*rout,*cout; 707 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 708 709 PetscFunctionBegin; 710 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 711 712 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 713 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 714 tmp = a->solve_work; 715 716 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 717 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 718 719 /* forward solve the lower triangular */ 720 tmp[0] = b[*r++]; 721 for (i=1; i<n; i++) { 722 v = aa + ai[i] + shift; 723 vi = aj + ai[i] + shift; 724 nz = a->diag[i] - ai[i]; 725 sum = b[*r++]; 726 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 727 tmp[i] = sum; 728 } 729 730 /* backward solve the upper triangular */ 731 for (i=n-1; i>=0; i--){ 732 v = aa + a->diag[i] + (!shift); 733 vi = aj + a->diag[i] + (!shift); 734 nz = ai[i+1] - a->diag[i] - 1; 735 sum = tmp[i]; 736 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 737 tmp[i] = sum*aa[a->diag[i]+shift]; 738 x[*c--] += tmp[i]; 739 } 740 741 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 743 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 745 PetscLogFlops(2*a->nz); 746 747 PetscFunctionReturn(0); 748 } 749 /* -------------------------------------------------------------------*/ 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 752 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 753 { 754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 755 IS iscol = a->col,isrow = a->row; 756 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 757 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 758 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 759 760 PetscFunctionBegin; 761 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 762 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 763 tmp = a->solve_work; 764 765 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 766 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 767 768 /* copy the b into temp work space according to permutation */ 769 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 770 771 /* forward solve the U^T */ 772 for (i=0; i<n; i++) { 773 v = aa + diag[i] + shift; 774 vi = aj + diag[i] + (!shift); 775 nz = ai[i+1] - diag[i] - 1; 776 s1 = tmp[i]; 777 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 778 while (nz--) { 779 tmp[*vi++ + shift] -= (*v++)*s1; 780 } 781 tmp[i] = s1; 782 } 783 784 /* backward solve the L^T */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + diag[i] - 1 + shift; 787 vi = aj + diag[i] - 1 + shift; 788 nz = diag[i] - ai[i]; 789 s1 = tmp[i]; 790 while (nz--) { 791 tmp[*vi-- + shift] -= (*v--)*s1; 792 } 793 } 794 795 /* copy tmp into x according to permutation */ 796 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 797 798 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 799 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 801 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 802 803 PetscLogFlops(2*a->nz-A->n); 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 809 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 810 { 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 812 IS iscol = a->col,isrow = a->row; 813 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 814 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 815 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 816 817 PetscFunctionBegin; 818 if (zz != xx) VecCopy(zz,xx); 819 820 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 tmp = a->solve_work; 823 824 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 825 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 826 827 /* copy the b into temp work space according to permutation */ 828 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 829 830 /* forward solve the U^T */ 831 for (i=0; i<n; i++) { 832 v = aa + diag[i] + shift; 833 vi = aj + diag[i] + (!shift); 834 nz = ai[i+1] - diag[i] - 1; 835 tmp[i] *= *v++; 836 while (nz--) { 837 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 838 } 839 } 840 841 /* backward solve the L^T */ 842 for (i=n-1; i>=0; i--){ 843 v = aa + diag[i] - 1 + shift; 844 vi = aj + diag[i] - 1 + shift; 845 nz = diag[i] - ai[i]; 846 while (nz--) { 847 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 848 } 849 } 850 851 /* copy tmp into x according to permutation */ 852 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 853 854 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 855 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 856 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 PetscLogFlops(2*a->nz); 860 PetscFunctionReturn(0); 861 } 862 /* ----------------------------------------------------------------*/ 863 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 867 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 868 { 869 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 870 IS isicol; 871 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 872 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 873 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 874 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 875 PetscTruth col_identity,row_identity; 876 PetscReal f; 877 878 PetscFunctionBegin; 879 f = info->fill; 880 levels = (int)info->levels; 881 diagonal_fill = (int)info->diagonal_fill; 882 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 883 884 /* special case that simply copies fill pattern */ 885 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 886 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 887 if (!levels && row_identity && col_identity) { 888 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 889 (*fact)->factor = FACTOR_LU; 890 b = (Mat_SeqAIJ*)(*fact)->data; 891 if (!b->diag) { 892 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 893 } 894 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 895 b->row = isrow; 896 b->col = iscol; 897 b->icol = isicol; 898 b->lu_damping = info->damping; 899 b->lu_zeropivot = info->zeropivot; 900 b->lu_shift = info->lu_shift; 901 b->lu_shift_fraction= info->lu_shift_fraction; 902 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 903 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 904 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 905 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 910 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 911 912 /* get new row pointers */ 913 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 914 ainew[0] = -shift; 915 /* don't know how many column pointers are needed so estimate */ 916 jmax = (int)(f*(ai[n]+!shift)); 917 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 918 /* ajfill is level of fill for each fill entry */ 919 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 920 /* fill is a linked list of nonzeros in active row */ 921 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 922 /* im is level for each filled value */ 923 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 924 /* dloc is location of diagonal in factor */ 925 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 926 dloc[0] = 0; 927 for (prow=0; prow<n; prow++) { 928 929 /* copy current row into linked list */ 930 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 931 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 932 xi = aj + ai[r[prow]] + shift; 933 fill[n] = n; 934 fill[prow] = -1; /* marker to indicate if diagonal exists */ 935 while (nz--) { 936 fm = n; 937 idx = ic[*xi++ + shift]; 938 do { 939 m = fm; 940 fm = fill[m]; 941 } while (fm < idx); 942 fill[m] = idx; 943 fill[idx] = fm; 944 im[idx] = 0; 945 } 946 947 /* make sure diagonal entry is included */ 948 if (diagonal_fill && fill[prow] == -1) { 949 fm = n; 950 while (fill[fm] < prow) fm = fill[fm]; 951 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 952 fill[fm] = prow; 953 im[prow] = 0; 954 nzf++; 955 dcount++; 956 } 957 958 nzi = 0; 959 row = fill[n]; 960 while (row < prow) { 961 incrlev = im[row] + 1; 962 nz = dloc[row]; 963 xi = ajnew + ainew[row] + shift + nz + 1; 964 flev = ajfill + ainew[row] + shift + nz + 1; 965 nnz = ainew[row+1] - ainew[row] - nz - 1; 966 fm = row; 967 while (nnz-- > 0) { 968 idx = *xi++ + shift; 969 if (*flev + incrlev > levels) { 970 flev++; 971 continue; 972 } 973 do { 974 m = fm; 975 fm = fill[m]; 976 } while (fm < idx); 977 if (fm != idx) { 978 im[idx] = *flev + incrlev; 979 fill[m] = idx; 980 fill[idx] = fm; 981 fm = idx; 982 nzf++; 983 } else { 984 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 985 } 986 flev++; 987 } 988 row = fill[row]; 989 nzi++; 990 } 991 /* copy new filled row into permanent storage */ 992 ainew[prow+1] = ainew[prow] + nzf; 993 if (ainew[prow+1] > jmax-shift) { 994 995 /* estimate how much additional space we will need */ 996 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 997 /* just double the memory each time */ 998 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 999 int maxadd = jmax; 1000 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1001 jmax += maxadd; 1002 1003 /* allocate a longer ajnew and ajfill */ 1004 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1005 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1006 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1007 ajnew = xi; 1008 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1009 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1010 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1011 ajfill = xi; 1012 realloc++; /* count how many times we realloc */ 1013 } 1014 xi = ajnew + ainew[prow] + shift; 1015 flev = ajfill + ainew[prow] + shift; 1016 dloc[prow] = nzi; 1017 fm = fill[n]; 1018 while (nzf--) { 1019 *xi++ = fm - shift; 1020 *flev++ = im[fm]; 1021 fm = fill[fm]; 1022 } 1023 /* make sure row has diagonal entry */ 1024 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1025 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1026 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1027 } 1028 } 1029 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1030 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1032 ierr = PetscFree(fill);CHKERRQ(ierr); 1033 ierr = PetscFree(im);CHKERRQ(ierr); 1034 1035 { 1036 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1037 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1038 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1041 if (diagonal_fill) { 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1043 } 1044 } 1045 1046 /* put together the new matrix */ 1047 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1048 PetscLogObjectParent(*fact,isicol); 1049 b = (Mat_SeqAIJ*)(*fact)->data; 1050 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1051 b->singlemalloc = PETSC_FALSE; 1052 len = (ainew[n] + shift)*sizeof(PetscScalar); 1053 /* the next line frees the default space generated by the Create() */ 1054 ierr = PetscFree(b->a);CHKERRQ(ierr); 1055 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1056 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1057 b->j = ajnew; 1058 b->i = ainew; 1059 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1060 b->diag = dloc; 1061 b->ilen = 0; 1062 b->imax = 0; 1063 b->row = isrow; 1064 b->col = iscol; 1065 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1067 b->icol = isicol; 1068 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1069 /* In b structure: Free imax, ilen, old a, old j. 1070 Allocate dloc, solve_work, new a, new j */ 1071 PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar))); 1072 b->maxnz = b->nz = ainew[n] + shift; 1073 b->lu_damping = info->damping; 1074 b->lu_zeropivot = info->zeropivot; 1075 (*fact)->factor = FACTOR_LU; 1076 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1077 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1078 1079 (*fact)->info.factor_mallocs = realloc; 1080 (*fact)->info.fill_ratio_given = f; 1081 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #include "src/mat/impls/sbaij/seq/sbaij.h" 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1088 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1089 { 1090 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1091 int ierr; 1092 1093 PetscFunctionBegin; 1094 if (!a->sbaijMat){ 1095 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1096 } 1097 1098 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1099 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1100 a->sbaijMat = PETSC_NULL; 1101 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1107 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1108 { 1109 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1110 int ierr; 1111 PetscTruth perm_identity; 1112 1113 PetscFunctionBegin; 1114 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1115 if (!perm_identity){ 1116 SETERRQ(1,"Non-identity permutation is not supported yet"); 1117 } 1118 if (!a->sbaijMat){ 1119 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1120 } 1121 1122 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1123 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1130 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1133 int ierr; 1134 PetscTruth perm_identity; 1135 1136 PetscFunctionBegin; 1137 printf(" MatCholeskyFactorSymbolic_SeqAIJ is called ..\n"); 1138 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1139 if (!perm_identity){ 1140 SETERRQ(1,"Non-identity permutation is not supported yet"); 1141 } 1142 if (!a->sbaijMat){ 1143 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1144 } 1145 1146 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1147 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1148 1149 PetscFunctionReturn(0); 1150 } 1151