1 /*$Id: aijfact.c,v 1.134 1999/12/16 23:31:37 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job,*o_i,*o_j; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 97 /* ------------------------------------------------------------ 98 If reorder=.TRUE., then the original matrix has to be 99 reordered to reflect the user selected ordering scheme, and 100 then de-reordered so it is in it's original format. 101 Because Saad's dperm() is NOT in place, we have to copy 102 the original matrix and allocate more storage. . . 103 ------------------------------------------------------------ 104 */ 105 106 /* set reorder to true if either isrow or iscol is not identity */ 107 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 108 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 109 reorder = PetscNot(reorder); 110 111 112 /* storage for ilu factor */ 113 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 114 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 115 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 116 117 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 118 119 /* ------------------------------------------------------------ 120 Make sure that everything is Fortran formatted (1-Based) 121 ------------------------------------------------------------ 122 */ 123 if (ishift) { 124 for (i=old_i[0];i<old_i[n];i++) { 125 old_j[i]++; 126 } 127 for(i=0;i<n+1;i++) { 128 old_i[i]++; 129 }; 130 }; 131 132 if (reorder) { 133 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 134 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 135 for(i=0;i<n;i++) { 136 r[i] = r[i]+1; 137 c[i] = c[i]+1; 138 } 139 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 140 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 141 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 142 job = 3; dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 143 for (i=0;i<n;i++) { 144 r[i] = r[i]-1; 145 c[i] = c[i]-1; 146 } 147 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 148 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 149 o_a = old_a2; 150 o_j = old_j2; 151 o_i = old_i2; 152 } else { 153 o_a = old_a; 154 o_j = old_j; 155 o_i = old_i; 156 } 157 158 /* ------------------------------------------------------------ 159 Call Saad's ilutp() routine to generate the factorization 160 ------------------------------------------------------------ 161 */ 162 163 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 164 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 165 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 166 167 ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 168 if (ierr) { 169 switch (ierr) { 170 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 171 break; 172 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 173 break; 174 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 175 break; 176 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 177 break; 178 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 179 break; 180 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 181 } 182 } 183 184 ierr = PetscFree(w);CHKERRQ(ierr); 185 ierr = PetscFree(jw);CHKERRQ(ierr); 186 187 /* ------------------------------------------------------------ 188 Saad's routine gives the result in Modified Sparse Row (msr) 189 Convert to Compressed Sparse Row format (csr) 190 ------------------------------------------------------------ 191 */ 192 193 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 194 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 195 196 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 197 198 ierr = PetscFree(iwk);CHKERRQ(ierr); 199 ierr = PetscFree(wk);CHKERRQ(ierr); 200 201 if (reorder) { 202 ierr = PetscFree(old_a2);CHKERRQ(ierr); 203 ierr = PetscFree(old_j2);CHKERRQ(ierr); 204 ierr = PetscFree(old_i2);CHKERRQ(ierr); 205 } else { 206 /* fix permutation of old_j that the factorization introduced */ 207 for (i=old_i[0]; i<=old_i[n]; i++) { 208 old_j[i-1] = iperm[old_j[i-1]-1]; 209 } 210 } 211 212 /* get rid of the shift to indices starting at 1 */ 213 if (ishift) { 214 for (i=0; i<n+1; i++) { 215 old_i[i]--; 216 } 217 for (i=old_i[0];i<old_i[n];i++) { 218 old_j[i]--; 219 } 220 } 221 222 /* Make the factored matrix 0-based */ 223 if (ishift) { 224 for (i=0; i<n+1; i++) { 225 new_i[i]--; 226 } 227 for (i=new_i[0];i<new_i[n];i++) { 228 new_j[i]--; 229 } 230 } 231 232 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 233 /*-- permute the right-hand-side and solution vectors --*/ 234 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 235 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 236 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 237 for(i=0; i<n; i++) { 238 ordcol[i] = ic[iperm[i]-1]; 239 }; 240 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 241 ierr = ISDestroy(isicol);CHKERRQ(ierr); 242 243 ierr = PetscFree(iperm);CHKERRQ(ierr); 244 245 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 246 ierr = PetscFree(ordcol);CHKERRQ(ierr); 247 248 /*----- put together the new matrix -----*/ 249 250 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 251 (*fact)->factor = FACTOR_LU; 252 (*fact)->assembled = PETSC_TRUE; 253 254 b = (Mat_SeqAIJ *) (*fact)->data; 255 ierr = PetscFree(b->imax);CHKERRQ(ierr); 256 b->sorted = PETSC_FALSE; 257 b->singlemalloc = PETSC_FALSE; 258 /* the next line frees the default space generated by the MatCreate() */ 259 ierr = PetscFree(b->a);CHKERRQ(ierr); 260 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 261 b->a = new_a; 262 b->j = new_j; 263 b->i = new_i; 264 b->ilen = 0; 265 b->imax = 0; 266 /* I am not sure why these are the inverse are the row and column permutations */ 267 b->row = isirow; 268 b->col = iscolf; 269 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 270 b->maxnz = b->nz = new_i[n]; 271 b->indexshift = a->indexshift; 272 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 273 (*fact)->info.factor_mallocs = 0; 274 275 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 276 277 /* check out for identical nodes. If found, use inode functions */ 278 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 279 280 PetscFunctionReturn(0); 281 } 282 #else 283 #undef __FUNC__ 284 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 285 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 286 { 287 PetscFunctionBegin; 288 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 289 } 290 #endif 291 292 /* 293 Factorization code for AIJ format. 294 */ 295 #undef __FUNC__ 296 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 297 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 298 { 299 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 300 IS isicol; 301 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 302 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 303 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 304 305 PetscFunctionBegin; 306 PetscValidHeaderSpecific(isrow,IS_COOKIE); 307 PetscValidHeaderSpecific(iscol,IS_COOKIE); 308 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 309 310 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 311 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 312 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 313 314 /* get new row pointers */ 315 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 316 ainew[0] = -shift; 317 /* don't know how many column pointers are needed so estimate */ 318 jmax = (int) (f*ai[n]+(!shift)); 319 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 320 /* fill is a linked list of nonzeros in active row */ 321 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 322 im = fill + n + 1; 323 /* idnew is location of diagonal in factor */ 324 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 325 idnew[0] = -shift; 326 327 for ( i=0; i<n; i++ ) { 328 /* first copy previous fill into linked list */ 329 nnz = nz = ai[r[i]+1] - ai[r[i]]; 330 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 331 ajtmp = aj + ai[r[i]] + shift; 332 fill[n] = n; 333 while (nz--) { 334 fm = n; 335 idx = ic[*ajtmp++ + shift]; 336 do { 337 m = fm; 338 fm = fill[m]; 339 } while (fm < idx); 340 fill[m] = idx; 341 fill[idx] = fm; 342 } 343 row = fill[n]; 344 while ( row < i ) { 345 ajtmp = ajnew + idnew[row] + (!shift); 346 nzbd = 1 + idnew[row] - ainew[row]; 347 nz = im[row] - nzbd; 348 fm = row; 349 while (nz-- > 0) { 350 idx = *ajtmp++ + shift; 351 nzbd++; 352 if (idx == i) im[row] = nzbd; 353 do { 354 m = fm; 355 fm = fill[m]; 356 } while (fm < idx); 357 if (fm != idx) { 358 fill[m] = idx; 359 fill[idx] = fm; 360 fm = idx; 361 nnz++; 362 } 363 } 364 row = fill[row]; 365 } 366 /* copy new filled row into permanent storage */ 367 ainew[i+1] = ainew[i] + nnz; 368 if (ainew[i+1] > jmax) { 369 370 /* estimate how much additional space we will need */ 371 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 372 /* just double the memory each time */ 373 int maxadd = jmax; 374 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 375 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 376 jmax += maxadd; 377 378 /* allocate a longer ajnew */ 379 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 380 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 381 ierr = PetscFree(ajnew);CHKERRQ(ierr); 382 ajnew = ajtmp; 383 realloc++; /* count how many times we realloc */ 384 } 385 ajtmp = ajnew + ainew[i] + shift; 386 fm = fill[n]; 387 nzi = 0; 388 im[i] = nnz; 389 while (nnz--) { 390 if (fm < i) nzi++; 391 *ajtmp++ = fm - shift; 392 fm = fill[fm]; 393 } 394 idnew[i] = ainew[i] + nzi; 395 } 396 if (ai[n] != 0) { 397 double af = ((double)ainew[n])/((double)ai[n]); 398 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 399 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 400 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 401 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 402 } else { 403 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 404 } 405 406 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 407 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 408 409 ierr = PetscFree(fill);CHKERRQ(ierr); 410 411 /* put together the new matrix */ 412 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 413 PLogObjectParent(*B,isicol); 414 b = (Mat_SeqAIJ *) (*B)->data; 415 ierr = PetscFree(b->imax);CHKERRQ(ierr); 416 b->singlemalloc = PETSC_FALSE; 417 /* the next line frees the default space generated by the Create() */ 418 ierr = PetscFree(b->a);CHKERRQ(ierr); 419 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 420 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 421 b->j = ajnew; 422 b->i = ainew; 423 b->diag = idnew; 424 b->ilen = 0; 425 b->imax = 0; 426 b->row = isrow; 427 b->col = iscol; 428 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 429 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 430 b->icol = isicol; 431 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 432 /* In b structure: Free imax, ilen, old a, old j. 433 Allocate idnew, solve_work, new a, new j */ 434 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 435 b->maxnz = b->nz = ainew[n] + shift; 436 437 (*B)->factor = FACTOR_LU;; 438 (*B)->info.factor_mallocs = realloc; 439 (*B)->info.fill_ratio_given = f; 440 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 441 442 if (ai[n] != 0) { 443 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 444 } else { 445 (*B)->info.fill_ratio_needed = 0.0; 446 } 447 PetscFunctionReturn(0); 448 } 449 /* ----------------------------------------------------------- */ 450 extern int Mat_AIJ_CheckInode(Mat); 451 452 #undef __FUNC__ 453 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 454 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 455 { 456 Mat C = *B; 457 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 458 IS isrow = b->row, isicol = b->icol; 459 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 460 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 461 int *diag_offset = b->diag,diag,k; 462 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 463 register int *pj; 464 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 465 double ssum; 466 register Scalar *pv, *rtmps,*u_values; 467 468 PetscFunctionBegin; 469 470 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 471 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 472 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 473 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 474 rtmps = rtmp + shift; ics = ic + shift; 475 476 /* precalculate row sums */ 477 if (preserve_row_sums) { 478 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 479 for ( i=0; i<n; i++ ) { 480 nz = a->i[r[i]+1] - a->i[r[i]]; 481 v = a->a + a->i[r[i]] + shift; 482 sum = 0.0; 483 for ( j=0; j<nz; j++ ) sum += v[j]; 484 rowsums[i] = sum; 485 } 486 } 487 488 for ( i=0; i<n; i++ ) { 489 nz = ai[i+1] - ai[i]; 490 ajtmp = aj + ai[i] + shift; 491 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 492 493 /* load in initial (unfactored row) */ 494 nz = a->i[r[i]+1] - a->i[r[i]]; 495 ajtmpold = a->j + a->i[r[i]] + shift; 496 v = a->a + a->i[r[i]] + shift; 497 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 498 499 row = *ajtmp++ + shift; 500 while (row < i ) { 501 pc = rtmp + row; 502 if (*pc != 0.0) { 503 pv = b->a + diag_offset[row] + shift; 504 pj = b->j + diag_offset[row] + (!shift); 505 multiplier = *pc / *pv++; 506 *pc = multiplier; 507 nz = ai[row+1] - diag_offset[row] - 1; 508 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 509 PLogFlops(2*nz); 510 } 511 row = *ajtmp++ + shift; 512 } 513 /* finished row so stick it into b->a */ 514 pv = b->a + ai[i] + shift; 515 pj = b->j + ai[i] + shift; 516 nz = ai[i+1] - ai[i]; 517 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 518 diag = diag_offset[i] - ai[i]; 519 /* 520 Possibly adjust diagonal entry on current row to force 521 LU matrix to have same row sum as initial matrix. 522 */ 523 if (pv[diag] == 0.0) { 524 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 525 } 526 if (preserve_row_sums) { 527 pj = b->j + ai[i] + shift; 528 sum = rowsums[i]; 529 for ( j=0; j<diag; j++ ) { 530 u_values = b->a + diag_offset[pj[j]] + shift; 531 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 532 inner_sum = 0.0; 533 for ( k=0; k<nz; k++ ) { 534 inner_sum += u_values[k]; 535 } 536 sum -= pv[j]*inner_sum; 537 538 } 539 nz = ai[i+1] - diag_offset[i] - 1; 540 u_values = b->a + diag_offset[i] + 1 + shift; 541 for ( k=0; k<nz; k++ ) { 542 sum -= u_values[k]; 543 } 544 ssum = PetscAbsScalar(sum/pv[diag]); 545 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 546 } 547 /* check pivot entry for current row */ 548 } 549 550 /* invert diagonal entries for simplier triangular solves */ 551 for ( i=0; i<n; i++ ) { 552 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 553 } 554 555 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 556 ierr = PetscFree(rtmp);CHKERRQ(ierr); 557 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 558 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 559 C->factor = FACTOR_LU; 560 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 561 C->assembled = PETSC_TRUE; 562 PLogFlops(b->n); 563 PetscFunctionReturn(0); 564 } 565 /* ----------------------------------------------------------- */ 566 #undef __FUNC__ 567 #define __FUNC__ "MatLUFactor_SeqAIJ" 568 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 569 { 570 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 571 int ierr,refct; 572 Mat C; 573 PetscOps *Abops; 574 MatOps Aops; 575 576 PetscFunctionBegin; 577 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 578 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 579 580 /* free all the data structures from mat */ 581 ierr = PetscFree(mat->a);CHKERRQ(ierr); 582 if (!mat->singlemalloc) { 583 ierr = PetscFree(mat->i);CHKERRQ(ierr); 584 ierr = PetscFree(mat->j);CHKERRQ(ierr); 585 } 586 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 587 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 588 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 589 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 590 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 591 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 592 ierr = PetscFree(mat);CHKERRQ(ierr); 593 594 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 595 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 596 597 /* 598 This is horrible, horrible code. We need to keep the 599 A pointers for the bops and ops but copy everything 600 else from C. 601 */ 602 Abops = A->bops; 603 Aops = A->ops; 604 refct = A->refct; 605 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 606 mat = (Mat_SeqAIJ *) A->data; 607 PLogObjectParent(A,mat->icol); 608 609 A->bops = Abops; 610 A->ops = Aops; 611 A->qlist = 0; 612 A->refct = refct; 613 /* copy over the type_name and name */ 614 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 615 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 616 617 PetscHeaderDestroy(C); 618 619 PetscFunctionReturn(0); 620 } 621 /* ----------------------------------------------------------- */ 622 #undef __FUNC__ 623 #define __FUNC__ "MatSolve_SeqAIJ" 624 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 625 { 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 627 IS iscol = a->col, isrow = a->row; 628 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 629 int nz,shift = a->indexshift,*rout,*cout; 630 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 631 632 PetscFunctionBegin; 633 if (!n) PetscFunctionReturn(0); 634 635 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 636 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 637 tmp = a->solve_work; 638 639 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 640 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 641 642 /* forward solve the lower triangular */ 643 tmp[0] = b[*r++]; 644 tmps = tmp + shift; 645 for ( i=1; i<n; i++ ) { 646 v = aa + ai[i] + shift; 647 vi = aj + ai[i] + shift; 648 nz = a->diag[i] - ai[i]; 649 sum = b[*r++]; 650 while (nz--) sum -= *v++ * tmps[*vi++]; 651 tmp[i] = sum; 652 } 653 654 /* backward solve the upper triangular */ 655 for ( i=n-1; i>=0; i-- ){ 656 v = aa + a->diag[i] + (!shift); 657 vi = aj + a->diag[i] + (!shift); 658 nz = ai[i+1] - a->diag[i] - 1; 659 sum = tmp[i]; 660 while (nz--) sum -= *v++ * tmps[*vi++]; 661 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 662 } 663 664 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 665 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 666 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 PLogFlops(2*a->nz - a->n); 669 PetscFunctionReturn(0); 670 } 671 672 /* ----------------------------------------------------------- */ 673 #undef __FUNC__ 674 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 675 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 676 { 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 678 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 679 Scalar *x,*b, *aa = a->a, sum; 680 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 681 int adiag_i,i,*vi,nz,ai_i; 682 Scalar *v; 683 #endif 684 685 PetscFunctionBegin; 686 if (!n) PetscFunctionReturn(0); 687 if (a->indexshift) { 688 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 694 695 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 696 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 697 #else 698 /* forward solve the lower triangular */ 699 x[0] = b[0]; 700 for ( i=1; i<n; i++ ) { 701 ai_i = ai[i]; 702 v = aa + ai_i; 703 vi = aj + ai_i; 704 nz = adiag[i] - ai_i; 705 sum = b[i]; 706 while (nz--) sum -= *v++ * x[*vi++]; 707 x[i] = sum; 708 } 709 710 /* backward solve the upper triangular */ 711 for ( i=n-1; i>=0; i-- ){ 712 adiag_i = adiag[i]; 713 v = aa + adiag_i + 1; 714 vi = aj + adiag_i + 1; 715 nz = ai[i+1] - adiag_i - 1; 716 sum = x[i]; 717 while (nz--) sum -= *v++ * x[*vi++]; 718 x[i] = sum*aa[adiag_i]; 719 } 720 #endif 721 PLogFlops(2*a->nz - a->n); 722 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 723 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNC__ 728 #define __FUNC__ "MatSolveAdd_SeqAIJ" 729 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 730 { 731 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 732 IS iscol = a->col, isrow = a->row; 733 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 734 int nz, shift = a->indexshift,*rout,*cout; 735 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 736 737 PetscFunctionBegin; 738 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 739 740 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 741 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 742 tmp = a->solve_work; 743 744 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 745 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 746 747 /* forward solve the lower triangular */ 748 tmp[0] = b[*r++]; 749 for ( i=1; i<n; i++ ) { 750 v = aa + ai[i] + shift; 751 vi = aj + ai[i] + shift; 752 nz = a->diag[i] - ai[i]; 753 sum = b[*r++]; 754 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 755 tmp[i] = sum; 756 } 757 758 /* backward solve the upper triangular */ 759 for ( i=n-1; i>=0; i-- ){ 760 v = aa + a->diag[i] + (!shift); 761 vi = aj + a->diag[i] + (!shift); 762 nz = ai[i+1] - a->diag[i] - 1; 763 sum = tmp[i]; 764 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 765 tmp[i] = sum*aa[a->diag[i]+shift]; 766 x[*c--] += tmp[i]; 767 } 768 769 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 770 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 771 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 772 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 773 PLogFlops(2*a->nz); 774 775 PetscFunctionReturn(0); 776 } 777 /* -------------------------------------------------------------------*/ 778 #undef __FUNC__ 779 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 780 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 781 { 782 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 783 IS iscol = a->col, isrow = a->row; 784 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 785 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 786 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 787 788 PetscFunctionBegin; 789 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 790 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 791 tmp = a->solve_work; 792 793 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 794 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 795 796 /* copy the b into temp work space according to permutation */ 797 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 798 799 /* forward solve the U^T */ 800 for ( i=0; i<n; i++ ) { 801 v = aa + diag[i] + shift; 802 vi = aj + diag[i] + (!shift); 803 nz = ai[i+1] - diag[i] - 1; 804 s1 = tmp[i]; 805 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 806 while (nz--) { 807 tmp[*vi++ + shift] -= (*v++)*s1; 808 } 809 tmp[i] = s1; 810 } 811 812 /* backward solve the L^T */ 813 for ( i=n-1; i>=0; i-- ){ 814 v = aa + diag[i] - 1 + shift; 815 vi = aj + diag[i] - 1 + shift; 816 nz = diag[i] - ai[i]; 817 s1 = tmp[i]; 818 while (nz--) { 819 tmp[*vi-- + shift] -= (*v--)*s1; 820 } 821 } 822 823 /* copy tmp into x according to permutation */ 824 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 825 826 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 827 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 828 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 PLogFlops(2*a->nz-a->n); 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 837 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 838 { 839 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 840 IS iscol = a->col, isrow = a->row; 841 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 842 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 843 Scalar *x,*b,*tmp, *aa = a->a, *v; 844 845 PetscFunctionBegin; 846 if (zz != xx) VecCopy(zz,xx); 847 848 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 tmp = a->solve_work; 851 852 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 853 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 854 855 /* copy the b into temp work space according to permutation */ 856 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 857 858 /* forward solve the U^T */ 859 for ( i=0; i<n; i++ ) { 860 v = aa + diag[i] + shift; 861 vi = aj + diag[i] + (!shift); 862 nz = ai[i+1] - diag[i] - 1; 863 tmp[i] *= *v++; 864 while (nz--) { 865 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 866 } 867 } 868 869 /* backward solve the L^T */ 870 for ( i=n-1; i>=0; i-- ){ 871 v = aa + diag[i] - 1 + shift; 872 vi = aj + diag[i] - 1 + shift; 873 nz = diag[i] - ai[i]; 874 while (nz--) { 875 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 876 } 877 } 878 879 /* copy tmp into x according to permutation */ 880 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 881 882 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 883 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 884 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 885 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 886 887 PLogFlops(2*a->nz); 888 PetscFunctionReturn(0); 889 } 890 /* ----------------------------------------------------------------*/ 891 extern int MatMissingDiagonal_SeqAIJ(Mat); 892 893 #undef __FUNC__ 894 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 895 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 896 { 897 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 898 IS isicol; 899 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 900 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 901 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 902 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 903 PetscTruth col_identity, row_identity; 904 double f; 905 906 PetscFunctionBegin; 907 if (info) { 908 f = info->fill; 909 levels = (int) info->levels; 910 diagonal_fill = (int) info->diagonal_fill; 911 } else { 912 f = 1.0; 913 levels = 0; 914 diagonal_fill = 0; 915 } 916 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 917 918 /* special case that simply copies fill pattern */ 919 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 920 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 921 if (!levels && row_identity && col_identity) { 922 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 923 (*fact)->factor = FACTOR_LU; 924 b = (Mat_SeqAIJ *) (*fact)->data; 925 if (!b->diag) { 926 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 927 } 928 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 929 b->row = isrow; 930 b->col = iscol; 931 b->icol = isicol; 932 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 933 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 934 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 935 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 940 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 941 942 /* get new row pointers */ 943 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 944 ainew[0] = -shift; 945 /* don't know how many column pointers are needed so estimate */ 946 jmax = (int) (f*(ai[n]+!shift)); 947 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 948 /* ajfill is level of fill for each fill entry */ 949 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 950 /* fill is a linked list of nonzeros in active row */ 951 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 952 /* im is level for each filled value */ 953 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 954 /* dloc is location of diagonal in factor */ 955 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 956 dloc[0] = 0; 957 for ( prow=0; prow<n; prow++ ) { 958 959 /* copy current row into linked list */ 960 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 961 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 962 xi = aj + ai[r[prow]] + shift; 963 fill[n] = n; 964 fill[prow] = -1; /* marker to indicate if diagonal exists */ 965 while (nz--) { 966 fm = n; 967 idx = ic[*xi++ + shift]; 968 do { 969 m = fm; 970 fm = fill[m]; 971 } while (fm < idx); 972 fill[m] = idx; 973 fill[idx] = fm; 974 im[idx] = 0; 975 } 976 977 /* make sure diagonal entry is included */ 978 if (diagonal_fill && fill[prow] == -1) { 979 fm = n; 980 while (fill[fm] < prow) fm = fill[fm]; 981 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 982 fill[fm] = prow; 983 im[prow] = 0; 984 nzf++; 985 dcount++; 986 } 987 988 nzi = 0; 989 row = fill[n]; 990 while ( row < prow ) { 991 incrlev = im[row] + 1; 992 nz = dloc[row]; 993 xi = ajnew + ainew[row] + shift + nz + 1; 994 flev = ajfill + ainew[row] + shift + nz + 1; 995 nnz = ainew[row+1] - ainew[row] - nz - 1; 996 fm = row; 997 while (nnz-- > 0) { 998 idx = *xi++ + shift; 999 if (*flev + incrlev > levels) { 1000 flev++; 1001 continue; 1002 } 1003 do { 1004 m = fm; 1005 fm = fill[m]; 1006 } while (fm < idx); 1007 if (fm != idx) { 1008 im[idx] = *flev + incrlev; 1009 fill[m] = idx; 1010 fill[idx] = fm; 1011 fm = idx; 1012 nzf++; 1013 } else { 1014 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1015 } 1016 flev++; 1017 } 1018 row = fill[row]; 1019 nzi++; 1020 } 1021 /* copy new filled row into permanent storage */ 1022 ainew[prow+1] = ainew[prow] + nzf; 1023 if (ainew[prow+1] > jmax-shift) { 1024 1025 /* estimate how much additional space we will need */ 1026 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1027 /* just double the memory each time */ 1028 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1029 int maxadd = jmax; 1030 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1031 jmax += maxadd; 1032 1033 /* allocate a longer ajnew and ajfill */ 1034 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1035 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1036 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1037 ajnew = xi; 1038 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1039 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1040 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1041 ajfill = xi; 1042 realloc++; /* count how many times we realloc */ 1043 } 1044 xi = ajnew + ainew[prow] + shift; 1045 flev = ajfill + ainew[prow] + shift; 1046 dloc[prow] = nzi; 1047 fm = fill[n]; 1048 while (nzf--) { 1049 *xi++ = fm - shift; 1050 *flev++ = im[fm]; 1051 fm = fill[fm]; 1052 } 1053 /* make sure row has diagonal entry */ 1054 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1055 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1056 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1057 } 1058 } 1059 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1060 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1061 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1062 ierr = PetscFree(fill);CHKERRQ(ierr); 1063 ierr = PetscFree(im);CHKERRQ(ierr); 1064 1065 { 1066 double af = ((double)ainew[n])/((double)ai[n]); 1067 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1068 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1069 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1070 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1071 if (diagonal_fill) { 1072 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1073 } 1074 } 1075 1076 /* put together the new matrix */ 1077 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1078 PLogObjectParent(*fact,isicol); 1079 b = (Mat_SeqAIJ *) (*fact)->data; 1080 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1081 b->singlemalloc = PETSC_FALSE; 1082 len = (ainew[n] + shift)*sizeof(Scalar); 1083 /* the next line frees the default space generated by the Create() */ 1084 ierr = PetscFree(b->a);CHKERRQ(ierr); 1085 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1086 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1087 b->j = ajnew; 1088 b->i = ainew; 1089 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1090 b->diag = dloc; 1091 b->ilen = 0; 1092 b->imax = 0; 1093 b->row = isrow; 1094 b->col = iscol; 1095 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1096 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1097 b->icol = isicol; 1098 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1099 /* In b structure: Free imax, ilen, old a, old j. 1100 Allocate dloc, solve_work, new a, new j */ 1101 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1102 b->maxnz = b->nz = ainew[n] + shift; 1103 (*fact)->factor = FACTOR_LU; 1104 1105 (*fact)->info.factor_mallocs = realloc; 1106 (*fact)->info.fill_ratio_given = f; 1107 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1108 (*fact)->factor = FACTOR_LU;; 1109 1110 PetscFunctionReturn(0); 1111 } 1112 1113 1114 1115 1116