1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 #include "src/inline/spops.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ishift = 0, for indices start at 1 50 ishift = 1, for indices starting at 0 51 ------------------------------------------------------------ 52 */ 53 int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 int *c,*r,*ic,ierr,i,n = A->m; 65 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 int *ordcol,*iwk,*iperm,*jw; 67 int ishift = !a->indexshift; 68 int jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal permtol,af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (int)(info->dtcount/2.0); 79 jmax = (int)(info->fill*a->nz); 80 permtol = info->dtcol; 81 82 83 /* ------------------------------------------------------------ 84 If reorder=.TRUE., then the original matrix has to be 85 reordered to reflect the user selected ordering scheme, and 86 then de-reordered so it is in it's original format. 87 Because Saad's dperm() is NOT in place, we have to copy 88 the original matrix and allocate more storage. . . 89 ------------------------------------------------------------ 90 */ 91 92 /* set reorder to true if either isrow or iscol is not identity */ 93 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 94 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 95 reorder = PetscNot(reorder); 96 97 98 /* storage for ilu factor */ 99 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 101 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 102 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 103 104 /* ------------------------------------------------------------ 105 Make sure that everything is Fortran formatted (1-Based) 106 ------------------------------------------------------------ 107 */ 108 if (ishift) { 109 for (i=old_i[0];i<old_i[n];i++) { 110 old_j[i]++; 111 } 112 for(i=0;i<n+1;i++) { 113 old_i[i]++; 114 }; 115 }; 116 117 if (reorder) { 118 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 119 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 120 for(i=0;i<n;i++) { 121 r[i] = r[i]+1; 122 c[i] = c[i]+1; 123 } 124 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 128 for (i=0;i<n;i++) { 129 r[i] = r[i]-1; 130 c[i] = c[i]-1; 131 } 132 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 134 o_a = old_a2; 135 o_j = old_j2; 136 o_i = old_i2; 137 } else { 138 o_a = old_a; 139 o_j = old_j; 140 o_i = old_i; 141 } 142 143 /* ------------------------------------------------------------ 144 Call Saad's ilutp() routine to generate the factorization 145 ------------------------------------------------------------ 146 */ 147 148 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 149 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 150 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 151 152 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 153 if (ierr) { 154 switch (ierr) { 155 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 156 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 157 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 158 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 159 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 160 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 161 } 162 } 163 164 ierr = PetscFree(w);CHKERRQ(ierr); 165 ierr = PetscFree(jw);CHKERRQ(ierr); 166 167 /* ------------------------------------------------------------ 168 Saad's routine gives the result in Modified Sparse Row (msr) 169 Convert to Compressed Sparse Row format (csr) 170 ------------------------------------------------------------ 171 */ 172 173 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 174 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 175 176 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 177 178 ierr = PetscFree(iwk);CHKERRQ(ierr); 179 ierr = PetscFree(wk);CHKERRQ(ierr); 180 181 if (reorder) { 182 ierr = PetscFree(old_a2);CHKERRQ(ierr); 183 ierr = PetscFree(old_j2);CHKERRQ(ierr); 184 ierr = PetscFree(old_i2);CHKERRQ(ierr); 185 } else { 186 /* fix permutation of old_j that the factorization introduced */ 187 for (i=old_i[0]; i<old_i[n]; i++) { 188 old_j[i-1] = iperm[old_j[i-1]-1]; 189 } 190 } 191 192 /* get rid of the shift to indices starting at 1 */ 193 if (ishift) { 194 for (i=0; i<n+1; i++) { 195 old_i[i]--; 196 } 197 for (i=old_i[0];i<old_i[n];i++) { 198 old_j[i]--; 199 } 200 } 201 202 /* Make the factored matrix 0-based */ 203 if (ishift) { 204 for (i=0; i<n+1; i++) { 205 new_i[i]--; 206 } 207 for (i=new_i[0];i<new_i[n];i++) { 208 new_j[i]--; 209 } 210 } 211 212 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 213 /*-- permute the right-hand-side and solution vectors --*/ 214 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 215 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 216 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 217 for(i=0; i<n; i++) { 218 ordcol[i] = ic[iperm[i]-1]; 219 }; 220 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 221 ierr = ISDestroy(isicol);CHKERRQ(ierr); 222 223 ierr = PetscFree(iperm);CHKERRQ(ierr); 224 225 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 226 ierr = PetscFree(ordcol);CHKERRQ(ierr); 227 228 /*----- put together the new matrix -----*/ 229 230 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 231 (*fact)->factor = FACTOR_LU; 232 (*fact)->assembled = PETSC_TRUE; 233 234 b = (Mat_SeqAIJ*)(*fact)->data; 235 ierr = PetscFree(b->imax);CHKERRQ(ierr); 236 b->sorted = PETSC_FALSE; 237 b->singlemalloc = PETSC_FALSE; 238 /* the next line frees the default space generated by the MatCreate() */ 239 ierr = PetscFree(b->a);CHKERRQ(ierr); 240 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 241 b->a = new_a; 242 b->j = new_j; 243 b->i = new_i; 244 b->ilen = 0; 245 b->imax = 0; 246 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 247 b->row = isirow; 248 b->col = iscolf; 249 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 250 b->maxnz = b->nz = new_i[n]; 251 b->indexshift = a->indexshift; 252 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 253 (*fact)->info.factor_mallocs = 0; 254 255 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 256 257 /* check out for identical nodes. If found, use inode functions */ 258 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 259 260 af = ((double)b->nz)/((double)a->nz) + .001; 261 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 262 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 263 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 264 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 265 266 PetscFunctionReturn(0); 267 #endif 268 } 269 270 /* 271 Factorization code for AIJ format. 272 */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 275 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 280 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 281 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 282 PetscReal f; 283 284 PetscFunctionBegin; 285 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 286 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 287 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 288 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 289 290 /* get new row pointers */ 291 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 292 ainew[0] = -shift; 293 /* don't know how many column pointers are needed so estimate */ 294 f = info->fill; 295 jmax = (int)(f*ai[n]+(!shift)); 296 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 297 /* fill is a linked list of nonzeros in active row */ 298 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 299 im = fill + n + 1; 300 /* idnew is location of diagonal in factor */ 301 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 302 idnew[0] = -shift; 303 304 for (i=0; i<n; i++) { 305 /* first copy previous fill into linked list */ 306 nnz = nz = ai[r[i]+1] - ai[r[i]]; 307 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]] + shift; 309 fill[n] = n; 310 while (nz--) { 311 fm = n; 312 idx = ic[*ajtmp++ + shift]; 313 do { 314 m = fm; 315 fm = fill[m]; 316 } while (fm < idx); 317 fill[m] = idx; 318 fill[idx] = fm; 319 } 320 row = fill[n]; 321 while (row < i) { 322 ajtmp = ajnew + idnew[row] + (!shift); 323 nzbd = 1 + idnew[row] - ainew[row]; 324 nz = im[row] - nzbd; 325 fm = row; 326 while (nz-- > 0) { 327 idx = *ajtmp++ + shift; 328 nzbd++; 329 if (idx == i) im[row] = nzbd; 330 do { 331 m = fm; 332 fm = fill[m]; 333 } while (fm < idx); 334 if (fm != idx) { 335 fill[m] = idx; 336 fill[idx] = fm; 337 fm = idx; 338 nnz++; 339 } 340 } 341 row = fill[row]; 342 } 343 /* copy new filled row into permanent storage */ 344 ainew[i+1] = ainew[i] + nnz; 345 if (ainew[i+1] > jmax) { 346 347 /* estimate how much additional space we will need */ 348 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 349 /* just double the memory each time */ 350 int maxadd = jmax; 351 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 352 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 353 jmax += maxadd; 354 355 /* allocate a longer ajnew */ 356 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 358 ierr = PetscFree(ajnew);CHKERRQ(ierr); 359 ajnew = ajtmp; 360 realloc++; /* count how many times we realloc */ 361 } 362 ajtmp = ajnew + ainew[i] + shift; 363 fm = fill[n]; 364 nzi = 0; 365 im[i] = nnz; 366 while (nnz--) { 367 if (fm < i) nzi++; 368 *ajtmp++ = fm - shift; 369 fm = fill[fm]; 370 } 371 idnew[i] = ainew[i] + nzi; 372 } 373 if (ai[n] != 0) { 374 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 375 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 376 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 377 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 378 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 379 } else { 380 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 381 } 382 383 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 385 386 ierr = PetscFree(fill);CHKERRQ(ierr); 387 388 /* put together the new matrix */ 389 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 390 PetscLogObjectParent(*B,isicol); 391 b = (Mat_SeqAIJ*)(*B)->data; 392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 393 b->singlemalloc = PETSC_FALSE; 394 /* the next line frees the default space generated by the Create() */ 395 ierr = PetscFree(b->a);CHKERRQ(ierr); 396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 397 ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 398 b->j = ajnew; 399 b->i = ainew; 400 b->diag = idnew; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 b->lu_damping = info->damping; 406 b->lu_zeropivot = info->zeropivot; 407 b->lu_shift = info->lu_shift; 408 b->lu_shift_fraction = info->lu_shift_fraction; 409 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 411 b->icol = isicol; 412 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 413 /* In b structure: Free imax, ilen, old a, old j. 414 Allocate idnew, solve_work, new a, new j */ 415 PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar))); 416 b->maxnz = b->nz = ainew[n] + shift; 417 418 (*B)->factor = FACTOR_LU; 419 (*B)->info.factor_mallocs = realloc; 420 (*B)->info.fill_ratio_given = f; 421 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 422 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 423 424 if (ai[n] != 0) { 425 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 426 } else { 427 (*B)->info.fill_ratio_needed = 0.0; 428 } 429 PetscFunctionReturn(0); 430 } 431 /* ----------------------------------------------------------- */ 432 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 436 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 437 { 438 Mat C = *B; 439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 440 IS isrow = b->row,isicol = b->icol; 441 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 442 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 443 int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 444 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 445 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 446 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 447 PetscTruth damp,lushift; 448 449 PetscFunctionBegin; 450 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 451 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 452 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 453 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 454 rtmps = rtmp + shift; ics = ic + shift; 455 456 if (!a->diag) { 457 ierr = MatMarkDiagonal_SeqAIJ(A); CHKERRQ(ierr); 458 } 459 460 if (b->lu_shift) { /* set max shift */ 461 int *aai = a->i,*ddiag = a->diag; 462 shift_top = 0; 463 for (i=0; i<n; i++) { 464 d = PetscAbsScalar((a->a)[ddiag[i]+shift]); 465 /* calculate amt of shift needed for this row */ 466 if (d<0) { 467 row_shift = 0; 468 } else { 469 row_shift = -2*d; 470 } 471 v = a->a+aai[i]+shift; 472 for (j=0; j<aai[i+1]-aai[i]; j++) 473 row_shift += PetscAbsScalar(v[j]); 474 if (row_shift>shift_top) shift_top = row_shift; 475 } 476 } 477 478 shift_fraction = 0; shift_amount = 0; 479 do { 480 damp = PETSC_FALSE; 481 lushift = PETSC_FALSE; 482 for (i=0; i<n; i++) { 483 nz = ai[i+1] - ai[i]; 484 ajtmp = aj + ai[i] + shift; 485 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 486 487 /* load in initial (unfactored row) */ 488 nz = a->i[r[i]+1] - a->i[r[i]]; 489 ajtmpold = a->j + a->i[r[i]] + shift; 490 v = a->a + a->i[r[i]] + shift; 491 for (j=0; j<nz; j++) { 492 rtmp[ics[ajtmpold[j]]] = v[j]; 493 } 494 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 495 496 row = *ajtmp++ + shift; 497 while (row < i) { 498 pc = rtmp + row; 499 if (*pc != 0.0) { 500 pv = b->a + diag_offset[row] + shift; 501 pj = b->j + diag_offset[row] + (!shift); 502 multiplier = *pc / *pv++; 503 *pc = multiplier; 504 nz = ai[row+1] - diag_offset[row] - 1; 505 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 506 PetscLogFlops(2*nz); 507 } 508 row = *ajtmp++ + shift; 509 } 510 /* finished row so stick it into b->a */ 511 pv = b->a + ai[i] + shift; 512 pj = b->j + ai[i] + shift; 513 nz = ai[i+1] - ai[i]; 514 diag = diag_offset[i] - ai[i]; 515 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 516 rs = 0.0; 517 for (j=0; j<nz; j++) { 518 pv[j] = rtmps[pj[j]]; 519 if (j != diag) rs += PetscAbsScalar(pv[j]); 520 } 521 #define MAX_NSHIFT 5 522 if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) { 523 if (nshift>MAX_NSHIFT) { 524 SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner"); 525 } else if (nshift==MAX_NSHIFT) { 526 shift_fraction = shift_hi; 527 } else { 528 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 529 } 530 shift_amount = shift_fraction * shift_top; 531 lushift = PETSC_TRUE; 532 nshift++; 533 break; 534 } 535 if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 536 if (damping) { 537 if (ndamp) damping *= 2.0; 538 damp = PETSC_TRUE; 539 ndamp++; 540 break; 541 } else { 542 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 543 } 544 } 545 } 546 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 547 /* 548 * if not already shifting up & shifting & started shifting & can refine, 549 * then try lower shift 550 */ 551 shift_hi = shift_fraction; 552 shift_fraction = (shift_hi+shift_lo)/2.; 553 shift_amount = shift_fraction * shift_top; 554 lushift = PETSC_TRUE; 555 nshift++; 556 } 557 } while (damp || lushift); 558 559 /* invert diagonal entries for simplier triangular solves */ 560 for (i=0; i<n; i++) { 561 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 562 } 563 564 ierr = PetscFree(rtmp);CHKERRQ(ierr); 565 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 566 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 567 C->factor = FACTOR_LU; 568 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 569 C->assembled = PETSC_TRUE; 570 PetscLogFlops(C->n); 571 if (ndamp) { 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 573 } 574 if (nshift) { 575 b->lu_shift_fraction = shift_fraction; 576 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction); 577 } 578 PetscFunctionReturn(0); 579 } 580 /* ----------------------------------------------------------- */ 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatLUFactor_SeqAIJ" 583 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 584 { 585 int ierr; 586 Mat C; 587 588 PetscFunctionBegin; 589 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 590 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 591 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 592 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 593 PetscFunctionReturn(0); 594 } 595 /* ----------------------------------------------------------- */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatSolve_SeqAIJ" 598 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 599 { 600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 601 IS iscol = a->col,isrow = a->row; 602 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 603 int nz,shift = a->indexshift,*rout,*cout; 604 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 605 606 PetscFunctionBegin; 607 if (!n) PetscFunctionReturn(0); 608 609 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 610 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 611 tmp = a->solve_work; 612 613 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 614 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 615 616 /* forward solve the lower triangular */ 617 tmp[0] = b[*r++]; 618 tmps = tmp + shift; 619 for (i=1; i<n; i++) { 620 v = aa + ai[i] + shift; 621 vi = aj + ai[i] + shift; 622 nz = a->diag[i] - ai[i]; 623 sum = b[*r++]; 624 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 625 tmp[i] = sum; 626 } 627 628 /* backward solve the upper triangular */ 629 for (i=n-1; i>=0; i--){ 630 v = aa + a->diag[i] + (!shift); 631 vi = aj + a->diag[i] + (!shift); 632 nz = ai[i+1] - a->diag[i] - 1; 633 sum = tmp[i]; 634 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 635 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 636 } 637 638 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 639 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 640 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 642 PetscLogFlops(2*a->nz - A->n); 643 PetscFunctionReturn(0); 644 } 645 646 /* ----------------------------------------------------------- */ 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 649 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 650 { 651 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 652 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 653 PetscScalar *x,*b,*aa = a->a; 654 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 655 int adiag_i,i,*vi,nz,ai_i; 656 PetscScalar *v,sum; 657 #endif 658 659 PetscFunctionBegin; 660 if (!n) PetscFunctionReturn(0); 661 if (a->indexshift) { 662 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 667 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 668 669 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 670 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 671 #else 672 /* forward solve the lower triangular */ 673 x[0] = b[0]; 674 for (i=1; i<n; i++) { 675 ai_i = ai[i]; 676 v = aa + ai_i; 677 vi = aj + ai_i; 678 nz = adiag[i] - ai_i; 679 sum = b[i]; 680 while (nz--) sum -= *v++ * x[*vi++]; 681 x[i] = sum; 682 } 683 684 /* backward solve the upper triangular */ 685 for (i=n-1; i>=0; i--){ 686 adiag_i = adiag[i]; 687 v = aa + adiag_i + 1; 688 vi = aj + adiag_i + 1; 689 nz = ai[i+1] - adiag_i - 1; 690 sum = x[i]; 691 while (nz--) sum -= *v++ * x[*vi++]; 692 x[i] = sum*aa[adiag_i]; 693 } 694 #endif 695 PetscLogFlops(2*a->nz - A->n); 696 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 697 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 703 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 704 { 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 706 IS iscol = a->col,isrow = a->row; 707 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 708 int nz,shift = a->indexshift,*rout,*cout; 709 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 710 711 PetscFunctionBegin; 712 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 713 714 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 715 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 716 tmp = a->solve_work; 717 718 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 719 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 720 721 /* forward solve the lower triangular */ 722 tmp[0] = b[*r++]; 723 for (i=1; i<n; i++) { 724 v = aa + ai[i] + shift; 725 vi = aj + ai[i] + shift; 726 nz = a->diag[i] - ai[i]; 727 sum = b[*r++]; 728 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 729 tmp[i] = sum; 730 } 731 732 /* backward solve the upper triangular */ 733 for (i=n-1; i>=0; i--){ 734 v = aa + a->diag[i] + (!shift); 735 vi = aj + a->diag[i] + (!shift); 736 nz = ai[i+1] - a->diag[i] - 1; 737 sum = tmp[i]; 738 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 739 tmp[i] = sum*aa[a->diag[i]+shift]; 740 x[*c--] += tmp[i]; 741 } 742 743 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 744 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 745 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 PetscLogFlops(2*a->nz); 748 749 PetscFunctionReturn(0); 750 } 751 /* -------------------------------------------------------------------*/ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 754 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 759 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 760 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 761 762 PetscFunctionBegin; 763 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 765 tmp = a->solve_work; 766 767 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 768 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 769 770 /* copy the b into temp work space according to permutation */ 771 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 772 773 /* forward solve the U^T */ 774 for (i=0; i<n; i++) { 775 v = aa + diag[i] + shift; 776 vi = aj + diag[i] + (!shift); 777 nz = ai[i+1] - diag[i] - 1; 778 s1 = tmp[i]; 779 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 780 while (nz--) { 781 tmp[*vi++ + shift] -= (*v++)*s1; 782 } 783 tmp[i] = s1; 784 } 785 786 /* backward solve the L^T */ 787 for (i=n-1; i>=0; i--){ 788 v = aa + diag[i] - 1 + shift; 789 vi = aj + diag[i] - 1 + shift; 790 nz = diag[i] - ai[i]; 791 s1 = tmp[i]; 792 while (nz--) { 793 tmp[*vi-- + shift] -= (*v--)*s1; 794 } 795 } 796 797 /* copy tmp into x according to permutation */ 798 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 799 800 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 801 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 802 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 805 PetscLogFlops(2*a->nz-A->n); 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 811 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 812 { 813 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 814 IS iscol = a->col,isrow = a->row; 815 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 816 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 817 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 818 819 PetscFunctionBegin; 820 if (zz != xx) VecCopy(zz,xx); 821 822 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 823 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 824 tmp = a->solve_work; 825 826 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 827 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 828 829 /* copy the b into temp work space according to permutation */ 830 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 831 832 /* forward solve the U^T */ 833 for (i=0; i<n; i++) { 834 v = aa + diag[i] + shift; 835 vi = aj + diag[i] + (!shift); 836 nz = ai[i+1] - diag[i] - 1; 837 tmp[i] *= *v++; 838 while (nz--) { 839 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 840 } 841 } 842 843 /* backward solve the L^T */ 844 for (i=n-1; i>=0; i--){ 845 v = aa + diag[i] - 1 + shift; 846 vi = aj + diag[i] - 1 + shift; 847 nz = diag[i] - ai[i]; 848 while (nz--) { 849 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 850 } 851 } 852 853 /* copy tmp into x according to permutation */ 854 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 855 856 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 857 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 858 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 859 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 860 861 PetscLogFlops(2*a->nz); 862 PetscFunctionReturn(0); 863 } 864 /* ----------------------------------------------------------------*/ 865 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 869 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 870 { 871 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 872 IS isicol; 873 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 874 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 875 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 876 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 877 PetscTruth col_identity,row_identity; 878 PetscReal f; 879 880 PetscFunctionBegin; 881 f = info->fill; 882 levels = (int)info->levels; 883 diagonal_fill = (int)info->diagonal_fill; 884 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 885 886 /* special case that simply copies fill pattern */ 887 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 888 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 889 if (!levels && row_identity && col_identity) { 890 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 891 (*fact)->factor = FACTOR_LU; 892 b = (Mat_SeqAIJ*)(*fact)->data; 893 if (!b->diag) { 894 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 895 } 896 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 897 b->row = isrow; 898 b->col = iscol; 899 b->icol = isicol; 900 b->lu_damping = info->damping; 901 b->lu_zeropivot = info->zeropivot; 902 b->lu_shift = info->lu_shift; 903 b->lu_shift_fraction= info->lu_shift_fraction; 904 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 905 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 906 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 907 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 912 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 913 914 /* get new row pointers */ 915 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 916 ainew[0] = -shift; 917 /* don't know how many column pointers are needed so estimate */ 918 jmax = (int)(f*(ai[n]+!shift)); 919 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 920 /* ajfill is level of fill for each fill entry */ 921 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 922 /* fill is a linked list of nonzeros in active row */ 923 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 924 /* im is level for each filled value */ 925 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 926 /* dloc is location of diagonal in factor */ 927 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 928 dloc[0] = 0; 929 for (prow=0; prow<n; prow++) { 930 931 /* copy current row into linked list */ 932 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 933 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 934 xi = aj + ai[r[prow]] + shift; 935 fill[n] = n; 936 fill[prow] = -1; /* marker to indicate if diagonal exists */ 937 while (nz--) { 938 fm = n; 939 idx = ic[*xi++ + shift]; 940 do { 941 m = fm; 942 fm = fill[m]; 943 } while (fm < idx); 944 fill[m] = idx; 945 fill[idx] = fm; 946 im[idx] = 0; 947 } 948 949 /* make sure diagonal entry is included */ 950 if (diagonal_fill && fill[prow] == -1) { 951 fm = n; 952 while (fill[fm] < prow) fm = fill[fm]; 953 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 954 fill[fm] = prow; 955 im[prow] = 0; 956 nzf++; 957 dcount++; 958 } 959 960 nzi = 0; 961 row = fill[n]; 962 while (row < prow) { 963 incrlev = im[row] + 1; 964 nz = dloc[row]; 965 xi = ajnew + ainew[row] + shift + nz + 1; 966 flev = ajfill + ainew[row] + shift + nz + 1; 967 nnz = ainew[row+1] - ainew[row] - nz - 1; 968 fm = row; 969 while (nnz-- > 0) { 970 idx = *xi++ + shift; 971 if (*flev + incrlev > levels) { 972 flev++; 973 continue; 974 } 975 do { 976 m = fm; 977 fm = fill[m]; 978 } while (fm < idx); 979 if (fm != idx) { 980 im[idx] = *flev + incrlev; 981 fill[m] = idx; 982 fill[idx] = fm; 983 fm = idx; 984 nzf++; 985 } else { 986 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 987 } 988 flev++; 989 } 990 row = fill[row]; 991 nzi++; 992 } 993 /* copy new filled row into permanent storage */ 994 ainew[prow+1] = ainew[prow] + nzf; 995 if (ainew[prow+1] > jmax-shift) { 996 997 /* estimate how much additional space we will need */ 998 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 999 /* just double the memory each time */ 1000 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1001 int maxadd = jmax; 1002 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1003 jmax += maxadd; 1004 1005 /* allocate a longer ajnew and ajfill */ 1006 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1007 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1008 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1009 ajnew = xi; 1010 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 1011 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1012 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1013 ajfill = xi; 1014 realloc++; /* count how many times we realloc */ 1015 } 1016 xi = ajnew + ainew[prow] + shift; 1017 flev = ajfill + ainew[prow] + shift; 1018 dloc[prow] = nzi; 1019 fm = fill[n]; 1020 while (nzf--) { 1021 *xi++ = fm - shift; 1022 *flev++ = im[fm]; 1023 fm = fill[fm]; 1024 } 1025 /* make sure row has diagonal entry */ 1026 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1027 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 1028 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1029 } 1030 } 1031 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1033 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1034 ierr = PetscFree(fill);CHKERRQ(ierr); 1035 ierr = PetscFree(im);CHKERRQ(ierr); 1036 1037 { 1038 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1039 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1040 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1041 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1042 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1043 if (diagonal_fill) { 1044 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 1045 } 1046 } 1047 1048 /* put together the new matrix */ 1049 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1050 PetscLogObjectParent(*fact,isicol); 1051 b = (Mat_SeqAIJ*)(*fact)->data; 1052 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1053 b->singlemalloc = PETSC_FALSE; 1054 len = (ainew[n] + shift)*sizeof(PetscScalar); 1055 /* the next line frees the default space generated by the Create() */ 1056 ierr = PetscFree(b->a);CHKERRQ(ierr); 1057 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1058 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1059 b->j = ajnew; 1060 b->i = ainew; 1061 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1062 b->diag = dloc; 1063 b->ilen = 0; 1064 b->imax = 0; 1065 b->row = isrow; 1066 b->col = iscol; 1067 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1068 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1069 b->icol = isicol; 1070 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1071 /* In b structure: Free imax, ilen, old a, old j. 1072 Allocate dloc, solve_work, new a, new j */ 1073 PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar))); 1074 b->maxnz = b->nz = ainew[n] + shift; 1075 b->lu_damping = info->damping; 1076 b->lu_shift = info->lu_shift; 1077 b->lu_shift_fraction = info->lu_shift_fraction; 1078 b->lu_zeropivot = info->zeropivot; 1079 (*fact)->factor = FACTOR_LU; 1080 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1081 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1082 1083 (*fact)->info.factor_mallocs = realloc; 1084 (*fact)->info.fill_ratio_given = f; 1085 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #include "src/mat/impls/sbaij/seq/sbaij.h" 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1092 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1093 { 1094 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1095 int ierr; 1096 1097 PetscFunctionBegin; 1098 if (!a->sbaijMat){ 1099 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1100 } 1101 1102 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);CHKERRQ(ierr); 1103 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1104 a->sbaijMat = PETSC_NULL; 1105 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1111 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1112 { 1113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1114 int ierr; 1115 PetscTruth perm_identity; 1116 1117 PetscFunctionBegin; 1118 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1119 if (!perm_identity){ 1120 SETERRQ(1,"Non-identity permutation is not supported yet"); 1121 } 1122 if (!a->sbaijMat){ 1123 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1124 } 1125 1126 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1127 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1128 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1134 int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1135 { 1136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1137 int ierr; 1138 PetscTruth perm_identity; 1139 1140 PetscFunctionBegin; 1141 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1142 if (!perm_identity){ 1143 SETERRQ(1,"Non-identity permutation is not supported yet"); 1144 } 1145 if (!a->sbaijMat){ 1146 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1147 } 1148 1149 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1150 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1151 1152 PetscFunctionReturn(0); 1153 } 1154