1 /*$Id: aijfact.c,v 1.129 1999/12/16 03:13:25 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isrowf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *ordrow,*iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98 99 100 /* ------------------------------------------------------------ 101 If reorder=.TRUE., then the original matrix has to be 102 reordered to reflect the user selected ordering scheme, and 103 then de-reordered so it is in it's original format. 104 Because Saad's dperm() is NOT in place, we have to copy 105 the original matrix and allocate more storage. . . 106 ------------------------------------------------------------ 107 */ 108 109 /* set reorder to true if either isrow or iscol is not identity */ 110 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112 reorder = PetscNot(reorder); 113 114 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 116 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 117 ierr = ISGetIndices(isirow,&ir); CHKERRQ(ierr); 118 119 /* storage for ilu factor */ 120 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 121 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 122 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 123 124 if (reorder) { 125 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 126 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 127 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 128 } 129 130 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 131 ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow); 132 133 /* ------------------------------------------------------------ 134 Make sure that everything is Fortran formatted (1-Based) 135 ------------------------------------------------------------ 136 */ 137 for (i=old_i[0]-shift;i<old_i[n]-shift;i++) { 138 old_j[i] = old_j[i]+ishift; 139 if (reorder) { 140 old_j2[i] = old_j[i]; 141 old_a2[i] = old_a[i]; 142 } 143 }; 144 145 for(i=0;i<n+1;i++) { 146 old_i[i] = old_i[i]+ishift; 147 if (reorder) old_i2[i]=old_i[i]; 148 }; 149 150 for(i=0;i<n;i++) { 151 r[i] = r[i]+1; 152 c[i] = c[i]+1; 153 ir[i] = ir[i]+1; 154 ic[i] = ic[i]+1; 155 } 156 157 if (reorder) { 158 job = 3; 159 dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,r,c,&job); 160 } 161 162 /* ------------------------------------------------------------ 163 Call Saad's ilutp() routine to generate the factorization 164 ------------------------------------------------------------ 165 */ 166 167 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 168 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 169 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 170 171 172 ilutp_(&n,old_a,old_j,old_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 173 if (ierr) { 174 switch (ierr) { 175 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 176 break; 177 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 178 break; 179 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 180 break; 181 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 182 break; 183 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 184 break; 185 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 186 } 187 } 188 189 ierr = PetscFree(w);CHKERRQ(ierr); 190 ierr = PetscFree(jw);CHKERRQ(ierr); 191 192 193 /* ------------------------------------------------------------ 194 Saad's routine gives the result in Modified Sparse Row (msr) 195 Convert to Compressed Sparse Row format (csr) 196 ------------------------------------------------------------ 197 */ 198 199 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 200 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 201 202 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 203 204 ierr = PetscFree(iwk);CHKERRQ(ierr); 205 ierr = PetscFree(wk);CHKERRQ(ierr); 206 207 for(i=old_i[0]; i<=old_i[n]; i++) { 208 old_j[i-1] = iperm[old_j[i-1]-1]; 209 if (reorder) { 210 old_j2[i-1] = old_j[i-1]; 211 old_a2[i-1] = old_a[i-1]; 212 } 213 }; 214 215 if (reorder) { 216 for(i=0; i<n+1; i++) { 217 old_i2[i]=old_i[i]; 218 }; 219 220 job = 3; 221 dperm_(&n,old_a2,old_j2,old_i2,old_a,old_j,old_i,ir,ic,&job); 222 ierr = PetscFree(old_a2);CHKERRQ(ierr); 223 ierr = PetscFree(old_j2);CHKERRQ(ierr); 224 ierr = PetscFree(old_i2);CHKERRQ(ierr); 225 } 226 227 /* get rid of the shift to indices starting at 1 */ 228 for (i=0; i<n+1; i++) { 229 old_i[i] = old_i[i]-ishift; 230 } 231 232 for (i=old_i[0]-shift;i<=old_i[n]-shift;i++) { 233 old_j[i-1] = old_j[i-1]-ishift; 234 } 235 236 /* Make the return matrix 0-based */ 237 238 for (i=new_i[0]-shift;i<=new_i[n]-shift;i++) { 239 new_j[i-1] = new_j[i-1]-ishift; 240 } 241 242 for (i=0; i<n+1; i++) { 243 new_i[i] = new_i[i]-ishift; 244 } 245 246 for (i=0;i<n;i++) { 247 r[i] = r[i]-1; 248 c[i] = c[i]-1; 249 ir[i] = ir[i]-1; 250 ic[i] = ic[i]-1; 251 } 252 253 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 254 /*-- permute the right-hand-side and solution vectors --*/ 255 for(i=0; i<n; i++) { 256 ordcol[i] = ic[iperm[i]-1]; 257 ordrow[i] = ir[i]; 258 }; 259 260 ierr = PetscFree(iperm);CHKERRQ(ierr); 261 262 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 263 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 264 265 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 266 ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr); 267 268 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 269 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf); 270 271 /*----- put together the new matrix -----*/ 272 273 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 274 (*fact)->factor = FACTOR_LU; 275 (*fact)->assembled = PETSC_TRUE; 276 277 b = (Mat_SeqAIJ *) (*fact)->data; 278 ierr = PetscFree(b->imax);CHKERRQ(ierr); 279 b->sorted = PETSC_FALSE; 280 b->singlemalloc = PETSC_TRUE; 281 /* the next line frees the default space generated by the Create() */ 282 ierr = PetscFree(b->a);CHKERRQ(ierr); 283 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 284 b->a = new_a; 285 b->j = new_j; 286 b->i = new_i; 287 b->ilen = 0; 288 b->imax = 0; 289 b->row = isrowf; 290 b->col = iscolf; 291 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 292 b->maxnz = b->nz = new_i[n]; 293 b->indexshift = a->indexshift; 294 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 295 (*fact)->info.factor_mallocs = 0; 296 297 PLogFlops(b->n); 298 299 a->j = old_j; 300 a->i = old_i; 301 a->a = old_a; 302 a->sorted = PETSC_FALSE; 303 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 304 305 /* check out for identical nodes. If found, use inode functions */ 306 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 307 308 PetscFunctionReturn(0); 309 } 310 #else 311 #undef __FUNC__ 312 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 313 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 314 { 315 PetscFunctionBegin; 316 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 317 } 318 #endif 319 320 /* 321 Factorization code for AIJ format. 322 */ 323 #undef __FUNC__ 324 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 325 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 326 { 327 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 328 IS isicol; 329 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 330 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 331 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 332 333 PetscFunctionBegin; 334 PetscValidHeaderSpecific(isrow,IS_COOKIE); 335 PetscValidHeaderSpecific(iscol,IS_COOKIE); 336 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 337 338 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 339 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 340 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 341 342 /* get new row pointers */ 343 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 344 ainew[0] = -shift; 345 /* don't know how many column pointers are needed so estimate */ 346 jmax = (int) (f*ai[n]+(!shift)); 347 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 348 /* fill is a linked list of nonzeros in active row */ 349 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 350 im = fill + n + 1; 351 /* idnew is location of diagonal in factor */ 352 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 353 idnew[0] = -shift; 354 355 for ( i=0; i<n; i++ ) { 356 /* first copy previous fill into linked list */ 357 nnz = nz = ai[r[i]+1] - ai[r[i]]; 358 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 359 ajtmp = aj + ai[r[i]] + shift; 360 fill[n] = n; 361 while (nz--) { 362 fm = n; 363 idx = ic[*ajtmp++ + shift]; 364 do { 365 m = fm; 366 fm = fill[m]; 367 } while (fm < idx); 368 fill[m] = idx; 369 fill[idx] = fm; 370 } 371 row = fill[n]; 372 while ( row < i ) { 373 ajtmp = ajnew + idnew[row] + (!shift); 374 nzbd = 1 + idnew[row] - ainew[row]; 375 nz = im[row] - nzbd; 376 fm = row; 377 while (nz-- > 0) { 378 idx = *ajtmp++ + shift; 379 nzbd++; 380 if (idx == i) im[row] = nzbd; 381 do { 382 m = fm; 383 fm = fill[m]; 384 } while (fm < idx); 385 if (fm != idx) { 386 fill[m] = idx; 387 fill[idx] = fm; 388 fm = idx; 389 nnz++; 390 } 391 } 392 row = fill[row]; 393 } 394 /* copy new filled row into permanent storage */ 395 ainew[i+1] = ainew[i] + nnz; 396 if (ainew[i+1] > jmax) { 397 398 /* estimate how much additional space we will need */ 399 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 400 /* just double the memory each time */ 401 int maxadd = jmax; 402 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 403 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 404 jmax += maxadd; 405 406 /* allocate a longer ajnew */ 407 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 408 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 409 ierr = PetscFree(ajnew);CHKERRQ(ierr); 410 ajnew = ajtmp; 411 realloc++; /* count how many times we realloc */ 412 } 413 ajtmp = ajnew + ainew[i] + shift; 414 fm = fill[n]; 415 nzi = 0; 416 im[i] = nnz; 417 while (nnz--) { 418 if (fm < i) nzi++; 419 *ajtmp++ = fm - shift; 420 fm = fill[fm]; 421 } 422 idnew[i] = ainew[i] + nzi; 423 } 424 if (ai[n] != 0) { 425 double af = ((double)ainew[n])/((double)ai[n]); 426 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 427 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 428 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 429 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 430 } else { 431 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 432 } 433 434 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 435 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 436 437 ierr = PetscFree(fill);CHKERRQ(ierr); 438 439 /* put together the new matrix */ 440 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 441 PLogObjectParent(*B,isicol); 442 b = (Mat_SeqAIJ *) (*B)->data; 443 ierr = PetscFree(b->imax);CHKERRQ(ierr); 444 b->singlemalloc = PETSC_FALSE; 445 /* the next line frees the default space generated by the Create() */ 446 ierr = PetscFree(b->a);CHKERRQ(ierr); 447 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 448 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 449 b->j = ajnew; 450 b->i = ainew; 451 b->diag = idnew; 452 b->ilen = 0; 453 b->imax = 0; 454 b->row = isrow; 455 b->col = iscol; 456 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 457 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 458 b->icol = isicol; 459 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 460 /* In b structure: Free imax, ilen, old a, old j. 461 Allocate idnew, solve_work, new a, new j */ 462 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 463 b->maxnz = b->nz = ainew[n] + shift; 464 465 (*B)->factor = FACTOR_LU;; 466 (*B)->info.factor_mallocs = realloc; 467 (*B)->info.fill_ratio_given = f; 468 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 469 470 if (ai[n] != 0) { 471 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 472 } else { 473 (*B)->info.fill_ratio_needed = 0.0; 474 } 475 PetscFunctionReturn(0); 476 } 477 /* ----------------------------------------------------------- */ 478 extern int Mat_AIJ_CheckInode(Mat); 479 480 #undef __FUNC__ 481 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 482 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 483 { 484 Mat C = *B; 485 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 486 IS isrow = b->row, isicol = b->icol; 487 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 488 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 489 int *diag_offset = b->diag,diag,k; 490 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 491 register int *pj; 492 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 493 double ssum; 494 register Scalar *pv, *rtmps,*u_values; 495 496 PetscFunctionBegin; 497 498 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 499 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 500 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 501 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 502 rtmps = rtmp + shift; ics = ic + shift; 503 504 /* precalculate row sums */ 505 if (preserve_row_sums) { 506 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 507 for ( i=0; i<n; i++ ) { 508 nz = a->i[r[i]+1] - a->i[r[i]]; 509 v = a->a + a->i[r[i]] + shift; 510 sum = 0.0; 511 for ( j=0; j<nz; j++ ) sum += v[j]; 512 rowsums[i] = sum; 513 } 514 } 515 516 for ( i=0; i<n; i++ ) { 517 nz = ai[i+1] - ai[i]; 518 ajtmp = aj + ai[i] + shift; 519 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 520 521 /* load in initial (unfactored row) */ 522 nz = a->i[r[i]+1] - a->i[r[i]]; 523 ajtmpold = a->j + a->i[r[i]] + shift; 524 v = a->a + a->i[r[i]] + shift; 525 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 526 527 row = *ajtmp++ + shift; 528 while (row < i ) { 529 pc = rtmp + row; 530 if (*pc != 0.0) { 531 pv = b->a + diag_offset[row] + shift; 532 pj = b->j + diag_offset[row] + (!shift); 533 multiplier = *pc / *pv++; 534 *pc = multiplier; 535 nz = ai[row+1] - diag_offset[row] - 1; 536 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 537 PLogFlops(2*nz); 538 } 539 row = *ajtmp++ + shift; 540 } 541 /* finished row so stick it into b->a */ 542 pv = b->a + ai[i] + shift; 543 pj = b->j + ai[i] + shift; 544 nz = ai[i+1] - ai[i]; 545 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 546 diag = diag_offset[i] - ai[i]; 547 /* 548 Possibly adjust diagonal entry on current row to force 549 LU matrix to have same row sum as initial matrix. 550 */ 551 if (pv[diag] == 0.0) { 552 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 553 } 554 if (preserve_row_sums) { 555 pj = b->j + ai[i] + shift; 556 sum = rowsums[i]; 557 for ( j=0; j<diag; j++ ) { 558 u_values = b->a + diag_offset[pj[j]] + shift; 559 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 560 inner_sum = 0.0; 561 for ( k=0; k<nz; k++ ) { 562 inner_sum += u_values[k]; 563 } 564 sum -= pv[j]*inner_sum; 565 566 } 567 nz = ai[i+1] - diag_offset[i] - 1; 568 u_values = b->a + diag_offset[i] + 1 + shift; 569 for ( k=0; k<nz; k++ ) { 570 sum -= u_values[k]; 571 } 572 ssum = PetscAbsScalar(sum/pv[diag]); 573 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 574 } 575 /* check pivot entry for current row */ 576 } 577 578 /* invert diagonal entries for simplier triangular solves */ 579 for ( i=0; i<n; i++ ) { 580 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 581 } 582 583 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 584 ierr = PetscFree(rtmp);CHKERRQ(ierr); 585 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 586 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 587 C->factor = FACTOR_LU; 588 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 589 C->assembled = PETSC_TRUE; 590 PLogFlops(b->n); 591 PetscFunctionReturn(0); 592 } 593 /* ----------------------------------------------------------- */ 594 #undef __FUNC__ 595 #define __FUNC__ "MatLUFactor_SeqAIJ" 596 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 597 { 598 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 599 int ierr,refct; 600 Mat C; 601 PetscOps *Abops; 602 MatOps Aops; 603 604 PetscFunctionBegin; 605 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 606 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 607 608 /* free all the data structures from mat */ 609 ierr = PetscFree(mat->a);CHKERRQ(ierr); 610 if (!mat->singlemalloc) { 611 ierr = PetscFree(mat->i);CHKERRQ(ierr); 612 ierr = PetscFree(mat->j);CHKERRQ(ierr); 613 } 614 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 615 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 616 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 617 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 618 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 619 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 620 ierr = PetscFree(mat);CHKERRQ(ierr); 621 622 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 623 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 624 625 /* 626 This is horrible, horrible code. We need to keep the 627 A pointers for the bops and ops but copy everything 628 else from C. 629 */ 630 Abops = A->bops; 631 Aops = A->ops; 632 refct = A->refct; 633 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 634 mat = (Mat_SeqAIJ *) A->data; 635 PLogObjectParent(A,mat->icol); 636 637 A->bops = Abops; 638 A->ops = Aops; 639 A->qlist = 0; 640 A->refct = refct; 641 /* copy over the type_name and name */ 642 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 643 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 644 645 PetscHeaderDestroy(C); 646 647 PetscFunctionReturn(0); 648 } 649 /* ----------------------------------------------------------- */ 650 #undef __FUNC__ 651 #define __FUNC__ "MatSolve_SeqAIJ" 652 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 653 { 654 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 655 IS iscol = a->col, isrow = a->row; 656 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 657 int nz,shift = a->indexshift,*rout,*cout; 658 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 659 660 PetscFunctionBegin; 661 if (!n) PetscFunctionReturn(0); 662 663 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 664 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 665 tmp = a->solve_work; 666 667 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 668 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 669 670 /* forward solve the lower triangular */ 671 tmp[0] = b[*r++]; 672 tmps = tmp + shift; 673 for ( i=1; i<n; i++ ) { 674 v = aa + ai[i] + shift; 675 vi = aj + ai[i] + shift; 676 nz = a->diag[i] - ai[i]; 677 sum = b[*r++]; 678 while (nz--) sum -= *v++ * tmps[*vi++]; 679 tmp[i] = sum; 680 } 681 682 /* backward solve the upper triangular */ 683 for ( i=n-1; i>=0; i-- ){ 684 v = aa + a->diag[i] + (!shift); 685 vi = aj + a->diag[i] + (!shift); 686 nz = ai[i+1] - a->diag[i] - 1; 687 sum = tmp[i]; 688 while (nz--) sum -= *v++ * tmps[*vi++]; 689 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 690 } 691 692 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 693 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 694 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 695 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 696 PLogFlops(2*a->nz - a->n); 697 PetscFunctionReturn(0); 698 } 699 700 /* ----------------------------------------------------------- */ 701 #undef __FUNC__ 702 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 703 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 704 { 705 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 706 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 707 Scalar *x,*b, *aa = a->a, sum; 708 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 709 int adiag_i,i,*vi,nz,ai_i; 710 Scalar *v; 711 #endif 712 713 PetscFunctionBegin; 714 if (!n) PetscFunctionReturn(0); 715 if (a->indexshift) { 716 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722 723 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 724 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 725 #else 726 /* forward solve the lower triangular */ 727 x[0] = b[0]; 728 for ( i=1; i<n; i++ ) { 729 ai_i = ai[i]; 730 v = aa + ai_i; 731 vi = aj + ai_i; 732 nz = adiag[i] - ai_i; 733 sum = b[i]; 734 while (nz--) sum -= *v++ * x[*vi++]; 735 x[i] = sum; 736 } 737 738 /* backward solve the upper triangular */ 739 for ( i=n-1; i>=0; i-- ){ 740 adiag_i = adiag[i]; 741 v = aa + adiag_i + 1; 742 vi = aj + adiag_i + 1; 743 nz = ai[i+1] - adiag_i - 1; 744 sum = x[i]; 745 while (nz--) sum -= *v++ * x[*vi++]; 746 x[i] = sum*aa[adiag_i]; 747 } 748 #endif 749 PLogFlops(2*a->nz - a->n); 750 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 751 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatSolveAdd_SeqAIJ" 757 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 758 { 759 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 760 IS iscol = a->col, isrow = a->row; 761 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 762 int nz, shift = a->indexshift,*rout,*cout; 763 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 764 765 PetscFunctionBegin; 766 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 767 768 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 769 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 770 tmp = a->solve_work; 771 772 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 773 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 774 775 /* forward solve the lower triangular */ 776 tmp[0] = b[*r++]; 777 for ( i=1; i<n; i++ ) { 778 v = aa + ai[i] + shift; 779 vi = aj + ai[i] + shift; 780 nz = a->diag[i] - ai[i]; 781 sum = b[*r++]; 782 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 783 tmp[i] = sum; 784 } 785 786 /* backward solve the upper triangular */ 787 for ( i=n-1; i>=0; i-- ){ 788 v = aa + a->diag[i] + (!shift); 789 vi = aj + a->diag[i] + (!shift); 790 nz = ai[i+1] - a->diag[i] - 1; 791 sum = tmp[i]; 792 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 793 tmp[i] = sum*aa[a->diag[i]+shift]; 794 x[*c--] += tmp[i]; 795 } 796 797 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 798 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 799 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 800 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801 PLogFlops(2*a->nz); 802 803 PetscFunctionReturn(0); 804 } 805 /* -------------------------------------------------------------------*/ 806 #undef __FUNC__ 807 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 808 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 809 { 810 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 811 IS iscol = a->col, isrow = a->row; 812 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 813 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 814 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 815 816 PetscFunctionBegin; 817 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 tmp = a->solve_work; 820 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823 824 /* copy the b into temp work space according to permutation */ 825 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 826 827 /* forward solve the U^T */ 828 for ( i=0; i<n; i++ ) { 829 v = aa + diag[i] + shift; 830 vi = aj + diag[i] + (!shift); 831 nz = ai[i+1] - diag[i] - 1; 832 s1 = tmp[i]; 833 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 834 while (nz--) { 835 tmp[*vi++ + shift] -= (*v++)*s1; 836 } 837 tmp[i] = s1; 838 } 839 840 /* backward solve the L^T */ 841 for ( i=n-1; i>=0; i-- ){ 842 v = aa + diag[i] - 1 + shift; 843 vi = aj + diag[i] - 1 + shift; 844 nz = diag[i] - ai[i]; 845 s1 = tmp[i]; 846 while (nz--) { 847 tmp[*vi-- + shift] -= (*v--)*s1; 848 } 849 } 850 851 /* copy tmp into x according to permutation */ 852 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 853 854 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 855 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 856 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 PLogFlops(2*a->nz-a->n); 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNC__ 864 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 865 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 866 { 867 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 868 IS iscol = a->col, isrow = a->row; 869 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 870 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 871 Scalar *x,*b,*tmp, *aa = a->a, *v; 872 873 PetscFunctionBegin; 874 if (zz != xx) VecCopy(zz,xx); 875 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 tmp = a->solve_work; 879 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 882 883 /* copy the b into temp work space according to permutation */ 884 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 885 886 /* forward solve the U^T */ 887 for ( i=0; i<n; i++ ) { 888 v = aa + diag[i] + shift; 889 vi = aj + diag[i] + (!shift); 890 nz = ai[i+1] - diag[i] - 1; 891 tmp[i] *= *v++; 892 while (nz--) { 893 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 894 } 895 } 896 897 /* backward solve the L^T */ 898 for ( i=n-1; i>=0; i-- ){ 899 v = aa + diag[i] - 1 + shift; 900 vi = aj + diag[i] - 1 + shift; 901 nz = diag[i] - ai[i]; 902 while (nz--) { 903 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 904 } 905 } 906 907 /* copy tmp into x according to permutation */ 908 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 909 910 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 912 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 913 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 914 915 PLogFlops(2*a->nz); 916 PetscFunctionReturn(0); 917 } 918 /* ----------------------------------------------------------------*/ 919 extern int MatMissingDiagonal_SeqAIJ(Mat); 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 923 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 924 { 925 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 926 IS isicol; 927 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 928 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 929 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 930 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 931 PetscTruth col_identity, row_identity; 932 double f; 933 934 PetscFunctionBegin; 935 if (info) { 936 f = info->fill; 937 levels = (int) info->levels; 938 diagonal_fill = (int) info->diagonal_fill; 939 } else { 940 f = 1.0; 941 levels = 0; 942 diagonal_fill = 0; 943 } 944 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 945 946 /* special case that simply copies fill pattern */ 947 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 948 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 949 if (!levels && row_identity && col_identity) { 950 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 951 (*fact)->factor = FACTOR_LU; 952 b = (Mat_SeqAIJ *) (*fact)->data; 953 if (!b->diag) { 954 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 955 } 956 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 957 b->row = isrow; 958 b->col = iscol; 959 b->icol = isicol; 960 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 961 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 962 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 963 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 968 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 969 970 /* get new row pointers */ 971 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 972 ainew[0] = -shift; 973 /* don't know how many column pointers are needed so estimate */ 974 jmax = (int) (f*(ai[n]+!shift)); 975 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 976 /* ajfill is level of fill for each fill entry */ 977 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 978 /* fill is a linked list of nonzeros in active row */ 979 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 980 /* im is level for each filled value */ 981 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 982 /* dloc is location of diagonal in factor */ 983 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 984 dloc[0] = 0; 985 for ( prow=0; prow<n; prow++ ) { 986 987 /* copy current row into linked list */ 988 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 989 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 990 xi = aj + ai[r[prow]] + shift; 991 fill[n] = n; 992 fill[prow] = -1; /* marker to indicate if diagonal exists */ 993 while (nz--) { 994 fm = n; 995 idx = ic[*xi++ + shift]; 996 do { 997 m = fm; 998 fm = fill[m]; 999 } while (fm < idx); 1000 fill[m] = idx; 1001 fill[idx] = fm; 1002 im[idx] = 0; 1003 } 1004 1005 /* make sure diagonal entry is included */ 1006 if (diagonal_fill && fill[prow] == -1) { 1007 fm = n; 1008 while (fill[fm] < prow) fm = fill[fm]; 1009 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1010 fill[fm] = prow; 1011 im[prow] = 0; 1012 nzf++; 1013 dcount++; 1014 } 1015 1016 nzi = 0; 1017 row = fill[n]; 1018 while ( row < prow ) { 1019 incrlev = im[row] + 1; 1020 nz = dloc[row]; 1021 xi = ajnew + ainew[row] + shift + nz + 1; 1022 flev = ajfill + ainew[row] + shift + nz + 1; 1023 nnz = ainew[row+1] - ainew[row] - nz - 1; 1024 fm = row; 1025 while (nnz-- > 0) { 1026 idx = *xi++ + shift; 1027 if (*flev + incrlev > levels) { 1028 flev++; 1029 continue; 1030 } 1031 do { 1032 m = fm; 1033 fm = fill[m]; 1034 } while (fm < idx); 1035 if (fm != idx) { 1036 im[idx] = *flev + incrlev; 1037 fill[m] = idx; 1038 fill[idx] = fm; 1039 fm = idx; 1040 nzf++; 1041 } else { 1042 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1043 } 1044 flev++; 1045 } 1046 row = fill[row]; 1047 nzi++; 1048 } 1049 /* copy new filled row into permanent storage */ 1050 ainew[prow+1] = ainew[prow] + nzf; 1051 if (ainew[prow+1] > jmax-shift) { 1052 1053 /* estimate how much additional space we will need */ 1054 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1055 /* just double the memory each time */ 1056 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1057 int maxadd = jmax; 1058 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1059 jmax += maxadd; 1060 1061 /* allocate a longer ajnew and ajfill */ 1062 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1063 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1064 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1065 ajnew = xi; 1066 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1067 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1068 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1069 ajfill = xi; 1070 realloc++; /* count how many times we realloc */ 1071 } 1072 xi = ajnew + ainew[prow] + shift; 1073 flev = ajfill + ainew[prow] + shift; 1074 dloc[prow] = nzi; 1075 fm = fill[n]; 1076 while (nzf--) { 1077 *xi++ = fm - shift; 1078 *flev++ = im[fm]; 1079 fm = fill[fm]; 1080 } 1081 /* make sure row has diagonal entry */ 1082 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1083 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1084 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1085 } 1086 } 1087 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1088 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1089 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1090 ierr = PetscFree(fill);CHKERRQ(ierr); 1091 ierr = PetscFree(im);CHKERRQ(ierr); 1092 1093 { 1094 double af = ((double)ainew[n])/((double)ai[n]); 1095 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1096 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1097 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1098 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1099 if (diagonal_fill) { 1100 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1101 } 1102 } 1103 1104 /* put together the new matrix */ 1105 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1106 PLogObjectParent(*fact,isicol); 1107 b = (Mat_SeqAIJ *) (*fact)->data; 1108 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1109 b->singlemalloc = PETSC_FALSE; 1110 len = (ainew[n] + shift)*sizeof(Scalar); 1111 /* the next line frees the default space generated by the Create() */ 1112 ierr = PetscFree(b->a);CHKERRQ(ierr); 1113 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1114 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1115 b->j = ajnew; 1116 b->i = ainew; 1117 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1118 b->diag = dloc; 1119 b->ilen = 0; 1120 b->imax = 0; 1121 b->row = isrow; 1122 b->col = iscol; 1123 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1124 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1125 b->icol = isicol; 1126 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1127 /* In b structure: Free imax, ilen, old a, old j. 1128 Allocate dloc, solve_work, new a, new j */ 1129 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1130 b->maxnz = b->nz = ainew[n] + shift; 1131 (*fact)->factor = FACTOR_LU; 1132 1133 (*fact)->info.factor_mallocs = realloc; 1134 (*fact)->info.fill_ratio_given = f; 1135 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1136 (*fact)->factor = FACTOR_LU;; 1137 1138 PetscFunctionReturn(0); 1139 } 1140 1141 1142 1143 1144