1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 8 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 9 { 10 PetscFunctionBegin; 11 12 SETERRQ(PETSC_ERR_SUP,"Code not written"); 13 #if !defined(PETSC_USE_DEBUG) 14 PetscFunctionReturn(0); 15 #endif 16 } 17 18 19 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 20 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 21 22 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 23 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 24 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 28 /* ------------------------------------------------------------ 29 30 This interface was contribed by Tony Caola 31 32 This routine is an interface to the pivoting drop-tolerance 33 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 34 SPARSEKIT2. 35 36 The SPARSEKIT2 routines used here are covered by the GNU 37 copyright; see the file gnu in this directory. 38 39 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 40 help in getting this routine ironed out. 41 42 The major drawback to this routine is that if info->fill is 43 not large enough it fails rather than allocating more space; 44 this can be fixed by hacking/improving the f2c version of 45 Yousef Saad's code. 46 47 ------------------------------------------------------------ 48 */ 49 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact) 50 { 51 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 52 PetscFunctionBegin; 53 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 54 You can obtain the drop tolerance routines by installing PETSc from\n\ 55 www.mcs.anl.gov/petsc\n"); 56 #else 57 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 58 IS iscolf,isicol,isirow; 59 PetscTruth reorder; 60 PetscErrorCode ierr,sierr; 61 PetscInt *c,*r,*ic,i,n = A->m; 62 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 63 PetscInt *ordcol,*iwk,*iperm,*jw; 64 PetscInt jmax,lfill,job,*o_i,*o_j; 65 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 66 PetscReal af; 67 68 PetscFunctionBegin; 69 70 if (info->dt == PETSC_DEFAULT) info->dt = .005; 71 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 72 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 73 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 74 lfill = (PetscInt)(info->dtcount/2.0); 75 jmax = (PetscInt)(info->fill*a->nz); 76 77 78 /* ------------------------------------------------------------ 79 If reorder=.TRUE., then the original matrix has to be 80 reordered to reflect the user selected ordering scheme, and 81 then de-reordered so it is in it's original format. 82 Because Saad's dperm() is NOT in place, we have to copy 83 the original matrix and allocate more storage. . . 84 ------------------------------------------------------------ 85 */ 86 87 /* set reorder to true if either isrow or iscol is not identity */ 88 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 89 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 90 reorder = PetscNot(reorder); 91 92 93 /* storage for ilu factor */ 94 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 95 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 97 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 for (i=old_i[0];i<old_i[n];i++) { 104 old_j[i]++; 105 } 106 for(i=0;i<n+1;i++) { 107 old_i[i]++; 108 }; 109 110 111 if (reorder) { 112 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 113 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 114 for(i=0;i<n;i++) { 115 r[i] = r[i]+1; 116 c[i] = c[i]+1; 117 } 118 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 119 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 121 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 122 for (i=0;i<n;i++) { 123 r[i] = r[i]-1; 124 c[i] = c[i]-1; 125 } 126 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 128 o_a = old_a2; 129 o_j = old_j2; 130 o_i = old_i2; 131 } else { 132 o_a = old_a; 133 o_j = old_j; 134 o_i = old_i; 135 } 136 137 /* ------------------------------------------------------------ 138 Call Saad's ilutp() routine to generate the factorization 139 ------------------------------------------------------------ 140 */ 141 142 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 144 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 145 146 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 147 if (sierr) { 148 switch (sierr) { 149 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 150 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 152 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 153 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 154 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 155 } 156 } 157 158 ierr = PetscFree(w);CHKERRQ(ierr); 159 ierr = PetscFree(jw);CHKERRQ(ierr); 160 161 /* ------------------------------------------------------------ 162 Saad's routine gives the result in Modified Sparse Row (msr) 163 Convert to Compressed Sparse Row format (csr) 164 ------------------------------------------------------------ 165 */ 166 167 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 168 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 169 170 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 171 172 ierr = PetscFree(iwk);CHKERRQ(ierr); 173 ierr = PetscFree(wk);CHKERRQ(ierr); 174 175 if (reorder) { 176 ierr = PetscFree(old_a2);CHKERRQ(ierr); 177 ierr = PetscFree(old_j2);CHKERRQ(ierr); 178 ierr = PetscFree(old_i2);CHKERRQ(ierr); 179 } else { 180 /* fix permutation of old_j that the factorization introduced */ 181 for (i=old_i[0]; i<old_i[n]; i++) { 182 old_j[i-1] = iperm[old_j[i-1]-1]; 183 } 184 } 185 186 /* get rid of the shift to indices starting at 1 */ 187 for (i=0; i<n+1; i++) { 188 old_i[i]--; 189 } 190 for (i=old_i[0];i<old_i[n];i++) { 191 old_j[i]--; 192 } 193 194 /* Make the factored matrix 0-based */ 195 for (i=0; i<n+1; i++) { 196 new_i[i]--; 197 } 198 for (i=new_i[0];i<new_i[n];i++) { 199 new_j[i]--; 200 } 201 202 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 203 /*-- permute the right-hand-side and solution vectors --*/ 204 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 205 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 206 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 207 for(i=0; i<n; i++) { 208 ordcol[i] = ic[iperm[i]-1]; 209 }; 210 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 211 ierr = ISDestroy(isicol);CHKERRQ(ierr); 212 213 ierr = PetscFree(iperm);CHKERRQ(ierr); 214 215 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 216 ierr = PetscFree(ordcol);CHKERRQ(ierr); 217 218 /*----- put together the new matrix -----*/ 219 220 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 221 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 222 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 223 (*fact)->factor = FACTOR_LU; 224 (*fact)->assembled = PETSC_TRUE; 225 226 b = (Mat_SeqAIJ*)(*fact)->data; 227 ierr = PetscFree(b->imax);CHKERRQ(ierr); 228 b->sorted = PETSC_FALSE; 229 b->singlemalloc = PETSC_FALSE; 230 /* the next line frees the default space generated by the MatCreate() */ 231 ierr = PetscFree(b->a);CHKERRQ(ierr); 232 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 /* check out for identical nodes. If found, use inode functions */ 249 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 250 251 af = ((double)b->nz)/((double)a->nz) + .001; 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 254 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 255 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 /* 262 Factorization code for AIJ format. 263 */ 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 266 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 267 { 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 269 IS isicol; 270 PetscErrorCode ierr; 271 PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j; 272 PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz; 273 PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 274 PetscReal f; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 284 ainew[0] = 0; 285 /* don't know how many column pointers are needed so estimate */ 286 f = info->fill; 287 jmax = (PetscInt)(f*ai[n]+1); 288 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 289 /* fill is a linked list of nonzeros in active row */ 290 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 291 im = fill + n + 1; 292 /* idnew is location of diagonal in factor */ 293 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 294 idnew[0] = 0; 295 296 for (i=0; i<n; i++) { 297 /* first copy previous fill into linked list */ 298 nnz = nz = ai[r[i]+1] - ai[r[i]]; 299 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 300 ajtmp = aj + ai[r[i]]; 301 fill[n] = n; 302 while (nz--) { 303 fm = n; 304 idx = ic[*ajtmp++]; 305 do { 306 m = fm; 307 fm = fill[m]; 308 } while (fm < idx); 309 fill[m] = idx; 310 fill[idx] = fm; 311 } 312 row = fill[n]; 313 while (row < i) { 314 ajtmp = ajnew + idnew[row] + 1; 315 nzbd = 1 + idnew[row] - ainew[row]; 316 nz = im[row] - nzbd; 317 fm = row; 318 while (nz-- > 0) { 319 idx = *ajtmp++ ; 320 nzbd++; 321 if (idx == i) im[row] = nzbd; 322 do { 323 m = fm; 324 fm = fill[m]; 325 } while (fm < idx); 326 if (fm != idx) { 327 fill[m] = idx; 328 fill[idx] = fm; 329 fm = idx; 330 nnz++; 331 } 332 } 333 row = fill[row]; 334 } 335 /* copy new filled row into permanent storage */ 336 ainew[i+1] = ainew[i] + nnz; 337 if (ainew[i+1] > jmax) { 338 339 /* estimate how much additional space we will need */ 340 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 341 /* just double the memory each time */ 342 PetscInt maxadd = jmax; 343 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 344 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 345 jmax += maxadd; 346 347 /* allocate a longer ajnew */ 348 ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 349 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));CHKERRQ(ierr); 350 ierr = PetscFree(ajnew);CHKERRQ(ierr); 351 ajnew = ajtmp; 352 reallocs++; /* count how many times we realloc */ 353 } 354 ajtmp = ajnew + ainew[i]; 355 fm = fill[n]; 356 nzi = 0; 357 im[i] = nnz; 358 while (nnz--) { 359 if (fm < i) nzi++; 360 *ajtmp++ = fm ; 361 fm = fill[fm]; 362 } 363 idnew[i] = ainew[i] + nzi; 364 } 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 367 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 368 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 369 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 370 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 371 } else { 372 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 373 } 374 375 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 376 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 377 378 ierr = PetscFree(fill);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 382 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 384 PetscLogObjectParent(*B,isicol); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 ierr = PetscFree(b->imax);CHKERRQ(ierr); 387 b->singlemalloc = PETSC_FALSE; 388 /* the next line frees the default space generated by the Create() */ 389 ierr = PetscFree(b->a);CHKERRQ(ierr); 390 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 391 ierr = PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 392 b->j = ajnew; 393 b->i = ainew; 394 b->diag = idnew; 395 b->ilen = 0; 396 b->imax = 0; 397 b->row = isrow; 398 b->col = iscol; 399 b->lu_damping = info->damping; 400 b->lu_zeropivot = info->zeropivot; 401 b->lu_shift = info->shift; 402 b->lu_shift_fraction = info->shift_fraction; 403 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 404 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 405 b->icol = isicol; 406 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 407 /* In b structure: Free imax, ilen, old a, old j. 408 Allocate idnew, solve_work, new a, new j */ 409 PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))); 410 b->maxnz = b->nz = ainew[n] ; 411 412 (*B)->factor = FACTOR_LU; 413 (*B)->info.factor_mallocs = reallocs; 414 (*B)->info.fill_ratio_given = f; 415 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 418 if (ai[n] != 0) { 419 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 420 } else { 421 (*B)->info.fill_ratio_needed = 0.0; 422 } 423 PetscFunctionReturn(0); 424 } 425 /* ----------------------------------------------------------- */ 426 EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 430 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 431 { 432 Mat C = *B; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 434 IS isrow = b->row,isicol = b->icol; 435 PetscErrorCode ierr; 436 PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j; 437 PetscInt *ajtmpold,*ajtmp,nz,row,*ics; 438 PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0; 439 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 440 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d; 441 PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.; 442 PetscTruth damp,lushift; 443 444 PetscFunctionBegin; 445 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 446 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 447 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 448 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 449 rtmps = rtmp; ics = ic; 450 451 if (!a->diag) { 452 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 453 } 454 455 if (b->lu_shift) { /* set max shift */ 456 PetscInt *aai = a->i,*ddiag = a->diag; 457 shift_top = 0; 458 for (i=0; i<n; i++) { 459 d = PetscAbsScalar((a->a)[ddiag[i]]); 460 /* calculate amt of shift needed for this row */ 461 if (d<=0) { 462 row_shift = 0; 463 } else { 464 row_shift = -2*d; 465 } 466 v = a->a+aai[i]; 467 for (j=0; j<aai[i+1]-aai[i]; j++) 468 row_shift += PetscAbsScalar(v[j]); 469 if (row_shift>shift_top) shift_top = row_shift; 470 } 471 } 472 473 shift_fraction = 0; shift_amount = 0; 474 do { 475 damp = PETSC_FALSE; 476 lushift = PETSC_FALSE; 477 for (i=0; i<n; i++) { 478 nz = ai[i+1] - ai[i]; 479 ajtmp = aj + ai[i]; 480 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 481 482 /* load in initial (unfactored row) */ 483 nz = a->i[r[i]+1] - a->i[r[i]]; 484 ajtmpold = a->j + a->i[r[i]]; 485 v = a->a + a->i[r[i]]; 486 for (j=0; j<nz; j++) { 487 rtmp[ics[ajtmpold[j]]] = v[j]; 488 } 489 rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */ 490 491 row = *ajtmp++ ; 492 while (row < i) { 493 pc = rtmp + row; 494 if (*pc != 0.0) { 495 pv = b->a + diag_offset[row]; 496 pj = b->j + diag_offset[row] + 1; 497 multiplier = *pc / *pv++; 498 *pc = multiplier; 499 nz = ai[row+1] - diag_offset[row] - 1; 500 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 501 PetscLogFlops(2*nz); 502 } 503 row = *ajtmp++; 504 } 505 /* finished row so stick it into b->a */ 506 pv = b->a + ai[i] ; 507 pj = b->j + ai[i] ; 508 nz = ai[i+1] - ai[i]; 509 diag = diag_offset[i] - ai[i]; 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 rs = 0.0; 512 for (j=0; j<nz; j++) { 513 pv[j] = rtmps[pj[j]]; 514 if (j != diag) rs += PetscAbsScalar(pv[j]); 515 } 516 #define MAX_NSHIFT 5 517 if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) { 518 if (nshift>MAX_NSHIFT) { 519 SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner"); 520 } else if (nshift==MAX_NSHIFT) { 521 shift_fraction = shift_hi; 522 lushift = PETSC_FALSE; 523 } else { 524 shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.; 525 lushift = PETSC_TRUE; 526 } 527 shift_amount = shift_fraction * shift_top; 528 nshift++; 529 break; 530 } 531 if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) { 532 if (damping) { 533 if (ndamp) damping *= 2.0; 534 damp = PETSC_TRUE; 535 ndamp++; 536 break; 537 } else { 538 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 539 } 540 } 541 } 542 if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) { 543 /* 544 * if no shift in this attempt & shifting & started shifting & can refine, 545 * then try lower shift 546 */ 547 shift_hi = shift_fraction; 548 shift_fraction = (shift_hi+shift_lo)/2.; 549 shift_amount = shift_fraction * shift_top; 550 lushift = PETSC_TRUE; 551 nshift++; 552 } 553 } while (damp || lushift); 554 555 /* invert diagonal entries for simplier triangular solves */ 556 for (i=0; i<n; i++) { 557 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 558 } 559 560 ierr = PetscFree(rtmp);CHKERRQ(ierr); 561 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 562 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 563 C->factor = FACTOR_LU; 564 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 565 C->assembled = PETSC_TRUE; 566 PetscLogFlops(C->n); 567 if (ndamp) { 568 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping); 569 } 570 if (nshift) { 571 b->lu_shift_fraction = shift_fraction; 572 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift); 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 579 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 580 { 581 PetscFunctionBegin; 582 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 583 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 584 PetscFunctionReturn(0); 585 } 586 587 588 /* ----------------------------------------------------------- */ 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatLUFactor_SeqAIJ" 591 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 592 { 593 PetscErrorCode ierr; 594 Mat C; 595 596 PetscFunctionBegin; 597 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 598 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 599 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 600 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 601 PetscFunctionReturn(0); 602 } 603 /* ----------------------------------------------------------- */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatSolve_SeqAIJ" 606 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 IS iscol = a->col,isrow = a->row; 610 PetscErrorCode ierr; 611 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 612 PetscInt nz,*rout,*cout; 613 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 614 615 PetscFunctionBegin; 616 if (!n) PetscFunctionReturn(0); 617 618 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 tmp = a->solve_work; 621 622 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 623 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 624 625 /* forward solve the lower triangular */ 626 tmp[0] = b[*r++]; 627 tmps = tmp; 628 for (i=1; i<n; i++) { 629 v = aa + ai[i] ; 630 vi = aj + ai[i] ; 631 nz = a->diag[i] - ai[i]; 632 sum = b[*r++]; 633 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 634 tmp[i] = sum; 635 } 636 637 /* backward solve the upper triangular */ 638 for (i=n-1; i>=0; i--){ 639 v = aa + a->diag[i] + 1; 640 vi = aj + a->diag[i] + 1; 641 nz = ai[i+1] - a->diag[i] - 1; 642 sum = tmp[i]; 643 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 644 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 645 } 646 647 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 649 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 651 PetscLogFlops(2*a->nz - A->n); 652 PetscFunctionReturn(0); 653 } 654 655 /* ----------------------------------------------------------- */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 658 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 663 PetscScalar *x,*b,*aa = a->a; 664 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 665 PetscInt adiag_i,i,*vi,nz,ai_i; 666 PetscScalar *v,sum; 667 #endif 668 669 PetscFunctionBegin; 670 if (!n) PetscFunctionReturn(0); 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PetscLogFlops(2*a->nz - A->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 709 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 PetscErrorCode ierr; 714 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 715 PetscInt nz,*rout,*cout; 716 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 717 718 PetscFunctionBegin; 719 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 720 721 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 tmp = a->solve_work; 724 725 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 726 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 727 728 /* forward solve the lower triangular */ 729 tmp[0] = b[*r++]; 730 for (i=1; i<n; i++) { 731 v = aa + ai[i] ; 732 vi = aj + ai[i] ; 733 nz = a->diag[i] - ai[i]; 734 sum = b[*r++]; 735 while (nz--) sum -= *v++ * tmp[*vi++ ]; 736 tmp[i] = sum; 737 } 738 739 /* backward solve the upper triangular */ 740 for (i=n-1; i>=0; i--){ 741 v = aa + a->diag[i] + 1; 742 vi = aj + a->diag[i] + 1; 743 nz = ai[i+1] - a->diag[i] - 1; 744 sum = tmp[i]; 745 while (nz--) sum -= *v++ * tmp[*vi++ ]; 746 tmp[i] = sum*aa[a->diag[i]]; 747 x[*c--] += tmp[i]; 748 } 749 750 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 752 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 754 PetscLogFlops(2*a->nz); 755 756 PetscFunctionReturn(0); 757 } 758 /* -------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 761 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout,*diag = a->diag; 768 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 773 tmp = a->solve_work; 774 775 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 776 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 777 778 /* copy the b into temp work space according to permutation */ 779 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 780 781 /* forward solve the U^T */ 782 for (i=0; i<n; i++) { 783 v = aa + diag[i] ; 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = tmp[i]; 787 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 788 while (nz--) { 789 tmp[*vi++ ] -= (*v++)*s1; 790 } 791 tmp[i] = s1; 792 } 793 794 /* backward solve the L^T */ 795 for (i=n-1; i>=0; i--){ 796 v = aa + diag[i] - 1 ; 797 vi = aj + diag[i] - 1 ; 798 nz = diag[i] - ai[i]; 799 s1 = tmp[i]; 800 while (nz--) { 801 tmp[*vi-- ] -= (*v--)*s1; 802 } 803 } 804 805 /* copy tmp into x according to permutation */ 806 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 807 808 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 809 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 810 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 813 PetscLogFlops(2*a->nz-A->n); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 819 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 820 { 821 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 822 IS iscol = a->col,isrow = a->row; 823 PetscErrorCode ierr; 824 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 825 PetscInt nz,*rout,*cout,*diag = a->diag; 826 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 827 828 PetscFunctionBegin; 829 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 830 831 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 833 tmp = a->solve_work; 834 835 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 836 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 837 838 /* copy the b into temp work space according to permutation */ 839 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 840 841 /* forward solve the U^T */ 842 for (i=0; i<n; i++) { 843 v = aa + diag[i] ; 844 vi = aj + diag[i] + 1; 845 nz = ai[i+1] - diag[i] - 1; 846 tmp[i] *= *v++; 847 while (nz--) { 848 tmp[*vi++ ] -= (*v++)*tmp[i]; 849 } 850 } 851 852 /* backward solve the L^T */ 853 for (i=n-1; i>=0; i--){ 854 v = aa + diag[i] - 1 ; 855 vi = aj + diag[i] - 1 ; 856 nz = diag[i] - ai[i]; 857 while (nz--) { 858 tmp[*vi-- ] -= (*v--)*tmp[i]; 859 } 860 } 861 862 /* copy tmp into x according to permutation */ 863 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 864 865 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 866 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 867 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 869 870 PetscLogFlops(2*a->nz); 871 PetscFunctionReturn(0); 872 } 873 /* ----------------------------------------------------------------*/ 874 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 878 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 879 { 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 881 IS isicol; 882 PetscErrorCode ierr; 883 PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j; 884 PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 885 PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0; 886 PetscInt incrlev,nnz,i,levels,diagonal_fill; 887 PetscTruth col_identity,row_identity; 888 PetscReal f; 889 890 PetscFunctionBegin; 891 f = info->fill; 892 levels = (PetscInt)info->levels; 893 diagonal_fill = (PetscInt)info->diagonal_fill; 894 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 895 896 /* special case that simply copies fill pattern */ 897 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 898 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 899 if (!levels && row_identity && col_identity) { 900 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 901 (*fact)->factor = FACTOR_LU; 902 b = (Mat_SeqAIJ*)(*fact)->data; 903 if (!b->diag) { 904 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 905 } 906 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 907 b->row = isrow; 908 b->col = iscol; 909 b->icol = isicol; 910 b->lu_damping = info->damping; 911 b->lu_zeropivot = info->zeropivot; 912 b->lu_shift = info->shift; 913 b->lu_shift_fraction= info->shift_fraction; 914 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 926 ainew[0] = 0; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (PetscInt)(f*(ai[n]+1)); 929 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 930 /* ajfill is level of fill for each fill entry */ 931 ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);CHKERRQ(ierr); 932 /* fill is a linked list of nonzeros in active row */ 933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 934 /* im is level for each filled value */ 935 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 936 /* dloc is location of diagonal in factor */ 937 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&dloc);CHKERRQ(ierr); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] ; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ ]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + nz + 1; 976 flev = ajfill + ainew[row] + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ ; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 PetscInt maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 ierr = PetscMalloc(jmax*sizeof(PetscInt),&xi);CHKERRQ(ierr); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 reallocs++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] ; 1027 flev = ajfill + ainew[prow] ; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm ; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+dloc[prow]] != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1050 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1051 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1052 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1060 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1061 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1062 PetscLogObjectParent(*fact,isicol); 1063 b = (Mat_SeqAIJ*)(*fact)->data; 1064 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1065 b->singlemalloc = PETSC_FALSE; 1066 len = (ainew[n] )*sizeof(PetscScalar); 1067 /* the next line frees the default space generated by the Create() */ 1068 ierr = PetscFree(b->a);CHKERRQ(ierr); 1069 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1070 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1071 b->j = ajnew; 1072 b->i = ainew; 1073 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1074 b->diag = dloc; 1075 b->ilen = 0; 1076 b->imax = 0; 1077 b->row = isrow; 1078 b->col = iscol; 1079 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1081 b->icol = isicol; 1082 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1083 /* In b structure: Free imax, ilen, old a, old j. 1084 Allocate dloc, solve_work, new a, new j */ 1085 PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))); 1086 b->maxnz = b->nz = ainew[n] ; 1087 b->lu_damping = info->damping; 1088 b->lu_shift = info->shift; 1089 b->lu_shift_fraction = info->shift_fraction; 1090 b->lu_zeropivot = info->zeropivot; 1091 (*fact)->factor = FACTOR_LU; 1092 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1093 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1094 1095 (*fact)->info.factor_mallocs = reallocs; 1096 (*fact)->info.fill_ratio_given = f; 1097 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1104 { 1105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1106 PetscErrorCode ierr,(*f)(Mat,Mat*); 1107 1108 PetscFunctionBegin; 1109 if (!a->sbaijMat){ 1110 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1111 } 1112 ierr = PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);CHKERRQ(ierr); 1113 ierr = (*f)(a->sbaijMat,fact);CHKERRQ(ierr); 1114 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1115 a->sbaijMat = PETSC_NULL; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1121 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1122 { 1123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 PetscTruth perm_identity; 1126 1127 PetscFunctionBegin; 1128 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1129 if (!perm_identity){ 1130 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1131 } 1132 if (!a->sbaijMat){ 1133 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1134 } 1135 1136 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1138 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1139 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #include "petscbt.h" 1144 #include "src/mat/utils/freespace.h" 1145 #include "src/mat/impls/sbaij/seq/sbaij.h" 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1148 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1149 { 1150 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1151 Mat_SeqSBAIJ *b; 1152 PetscErrorCode ierr; 1153 PetscTruth perm_identity; 1154 PetscReal fill = info->fill; 1155 PetscInt *rip,i,am=A->m,*ai,*aj,reallocs=0,prow; 1156 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1157 PetscInt nlnk,*lnk,ncols,*cols,*uj,**uj_ptr; 1158 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1159 PetscBT lnkbt; 1160 1161 PetscFunctionBegin; 1162 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1163 if (!perm_identity){ 1164 SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet"); 1165 } 1166 1167 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1168 ai = a->i; aj = a->j; 1169 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1170 1171 /* initialization */ 1172 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1173 ui[0] = 0; 1174 1175 /* jl: linked list for storing indices of the pivot rows 1176 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1177 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 1178 il = jl + am; 1179 uj_ptr = (PetscInt**)(il + am); 1180 for (i=0; i<am; i++){ 1181 jl[i] = am; il[i] = 0; 1182 } 1183 1184 /* create and initialize a linked list for storing column indices of the active row k */ 1185 nlnk = am + 1; 1186 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1187 1188 /* initial FreeSpace size is fill*(ai[am]+1) */ 1189 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1190 current_space = free_space; 1191 1192 for (k=0; k<am; k++){ /* for each active row k */ 1193 /* initialize lnk by the column indices of row rip[k] of A */ 1194 nzk = 0; 1195 ncols = ai[rip[k]+1] - a->diag[rip[k]]; 1196 cols = aj + a->diag[rip[k]]; 1197 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1198 nzk += nlnk; 1199 1200 /* update lnk by computing fill-in for each pivot row to be merged in */ 1201 prow = jl[k]; /* 1st pivot row */ 1202 1203 while (prow < k){ 1204 nextprow = jl[prow]; 1205 1206 /* merge prow into k-th row */ 1207 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1208 jmax = ui[prow+1]; 1209 ncols = jmax-jmin; 1210 cols = uj_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1211 ierr = PetscLLAdd(ncols,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1212 nzk += nlnk; 1213 1214 /* update il and jl for prow */ 1215 if (jmin < jmax){ 1216 il[prow] = jmin; 1217 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1218 } 1219 prow = nextprow; 1220 } 1221 1222 /* if free space is not available, make more free space */ 1223 if (current_space->local_remaining<nzk) { 1224 i = am - k + 1; /* num of unfactored rows */ 1225 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1226 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1227 reallocs++; 1228 } 1229 1230 /* copy data into free space, then initialize lnk */ 1231 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1232 1233 /* add the k-th row into il and jl */ 1234 if (nzk-1 > 0){ 1235 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1236 jl[k] = jl[i]; jl[i] = k; 1237 il[k] = ui[k] + 1; 1238 } 1239 uj_ptr[k] = current_space->array; 1240 current_space->array += nzk; 1241 current_space->local_used += nzk; 1242 current_space->local_remaining -= nzk; 1243 1244 ui[k+1] = ui[k] + nzk; 1245 } 1246 1247 if (ai[am] != 0) { 1248 PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1249 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1250 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1251 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1252 } else { 1253 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1254 } 1255 1256 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1257 ierr = PetscFree(jl);CHKERRQ(ierr); 1258 1259 /* destroy list of free space and other temporary array(s) */ 1260 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1261 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1262 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1263 1264 /* put together the new matrix in MATSEQSBAIJ format */ 1265 ierr = MatCreate(A->comm,am,am,am,am,fact);CHKERRQ(ierr); 1266 ierr = MatSetType(*fact,MATSEQSBAIJ);CHKERRQ(ierr); 1267 ierr = MatSeqSBAIJSetPreallocation(*fact,1,0,PETSC_NULL);CHKERRQ(ierr); 1268 1269 b = (Mat_SeqSBAIJ*)(*fact)->data; 1270 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1271 b->singlemalloc = PETSC_FALSE; 1272 /* the next line frees the default space generated by the Create() */ 1273 ierr = PetscFree(b->a);CHKERRQ(ierr); 1274 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1275 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1276 b->j = uj; 1277 b->i = ui; 1278 b->diag = 0; 1279 b->ilen = 0; 1280 b->imax = 0; 1281 b->row = perm; 1282 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1283 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1284 b->icol = perm; 1285 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1286 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1287 /* In b structure: Free imax, ilen, old a, old j. 1288 Allocate idnew, solve_work, new a, new j */ 1289 PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))); 1290 b->maxnz = b->nz = ui[am]; 1291 1292 (*fact)->factor = FACTOR_CHOLESKY; 1293 (*fact)->info.factor_mallocs = reallocs; 1294 (*fact)->info.fill_ratio_given = fill; 1295 if (ai[am] != 0) { 1296 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1297 } else { 1298 (*fact)->info.fill_ratio_needed = 0.0; 1299 } 1300 1301 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1302 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1303 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1304 1305 /* -------- end of new ---------------------------*/ 1306 ierr = PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);CHKERRQ(ierr); 1307 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1308 1309 PetscFunctionReturn(0); 1310 } 1311