1 /*$Id: aijfact.c,v 1.133 1999/12/16 23:25:12 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job,*o_i,*o_j; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98 99 100 /* ------------------------------------------------------------ 101 If reorder=.TRUE., then the original matrix has to be 102 reordered to reflect the user selected ordering scheme, and 103 then de-reordered so it is in it's original format. 104 Because Saad's dperm() is NOT in place, we have to copy 105 the original matrix and allocate more storage. . . 106 ------------------------------------------------------------ 107 */ 108 109 /* set reorder to true if either isrow or iscol is not identity */ 110 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112 reorder = PetscNot(reorder); 113 114 115 /* storage for ilu factor */ 116 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 117 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 118 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 119 120 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 121 122 /* ------------------------------------------------------------ 123 Make sure that everything is Fortran formatted (1-Based) 124 ------------------------------------------------------------ 125 */ 126 if (ishift) { 127 for (i=old_i[0];i<old_i[n];i++) { 128 old_j[i]++; 129 } 130 for(i=0;i<n+1;i++) { 131 old_i[i]++; 132 }; 133 }; 134 135 if (reorder) { 136 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 137 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 138 for(i=0;i<n;i++) { 139 r[i] = r[i]+1; 140 c[i] = c[i]+1; 141 } 142 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 143 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 144 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 145 job = 3; dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 146 for (i=0;i<n;i++) { 147 r[i] = r[i]-1; 148 c[i] = c[i]-1; 149 } 150 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 151 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 152 o_a = old_a2; 153 o_j = old_j2; 154 o_i = old_i2; 155 } else { 156 o_a = old_a; 157 o_j = old_j; 158 o_i = old_i; 159 } 160 161 /* ------------------------------------------------------------ 162 Call Saad's ilutp() routine to generate the factorization 163 ------------------------------------------------------------ 164 */ 165 166 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 167 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 168 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 169 170 ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 171 if (ierr) { 172 switch (ierr) { 173 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 174 break; 175 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 176 break; 177 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 178 break; 179 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 180 break; 181 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 182 break; 183 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 184 } 185 } 186 187 ierr = PetscFree(w);CHKERRQ(ierr); 188 ierr = PetscFree(jw);CHKERRQ(ierr); 189 190 /* ------------------------------------------------------------ 191 Saad's routine gives the result in Modified Sparse Row (msr) 192 Convert to Compressed Sparse Row format (csr) 193 ------------------------------------------------------------ 194 */ 195 196 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 197 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 198 199 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 200 201 ierr = PetscFree(iwk);CHKERRQ(ierr); 202 ierr = PetscFree(wk);CHKERRQ(ierr); 203 204 if (reorder) { 205 ierr = PetscFree(old_a2);CHKERRQ(ierr); 206 ierr = PetscFree(old_j2);CHKERRQ(ierr); 207 ierr = PetscFree(old_i2);CHKERRQ(ierr); 208 } else { 209 /* fix permutation of old_j that the factorization introduced */ 210 for (i=old_i[0]; i<=old_i[n]; i++) { 211 old_j[i-1] = iperm[old_j[i-1]-1]; 212 } 213 } 214 215 /* get rid of the shift to indices starting at 1 */ 216 if (ishift) { 217 for (i=0; i<n+1; i++) { 218 old_i[i]--; 219 } 220 for (i=old_i[0];i<old_i[n];i++) { 221 old_j[i]--; 222 } 223 } 224 225 /* Make the factored matrix 0-based */ 226 if (ishift) { 227 for (i=0; i<n+1; i++) { 228 new_i[i]--; 229 } 230 for (i=new_i[0];i<new_i[n];i++) { 231 new_j[i]--; 232 } 233 } 234 235 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 236 /*-- permute the right-hand-side and solution vectors --*/ 237 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 238 for(i=0; i<n; i++) { 239 ordcol[i] = ic[iperm[i]-1]; 240 }; 241 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 242 ierr = ISDestroy(isicol);CHKERRQ(ierr); 243 244 ierr = PetscFree(iperm);CHKERRQ(ierr); 245 246 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 247 ierr = PetscFree(ordcol);CHKERRQ(ierr); 248 249 /*----- put together the new matrix -----*/ 250 251 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 252 (*fact)->factor = FACTOR_LU; 253 (*fact)->assembled = PETSC_TRUE; 254 255 b = (Mat_SeqAIJ *) (*fact)->data; 256 ierr = PetscFree(b->imax);CHKERRQ(ierr); 257 b->sorted = PETSC_FALSE; 258 b->singlemalloc = PETSC_FALSE; 259 /* the next line frees the default space generated by the MatCreate() */ 260 ierr = PetscFree(b->a);CHKERRQ(ierr); 261 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 262 b->a = new_a; 263 b->j = new_j; 264 b->i = new_i; 265 b->ilen = 0; 266 b->imax = 0; 267 /* I am not sure why these are the inverse are the row and column permutations */ 268 b->row = isirow; 269 b->col = iscolf; 270 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 271 b->maxnz = b->nz = new_i[n]; 272 b->indexshift = a->indexshift; 273 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 274 (*fact)->info.factor_mallocs = 0; 275 276 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 277 278 /* check out for identical nodes. If found, use inode functions */ 279 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 280 281 PetscFunctionReturn(0); 282 } 283 #else 284 #undef __FUNC__ 285 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 286 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 287 { 288 PetscFunctionBegin; 289 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 290 } 291 #endif 292 293 /* 294 Factorization code for AIJ format. 295 */ 296 #undef __FUNC__ 297 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 298 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 299 { 300 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 301 IS isicol; 302 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 303 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 304 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 305 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(isrow,IS_COOKIE); 308 PetscValidHeaderSpecific(iscol,IS_COOKIE); 309 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 310 311 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row pointers */ 316 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 317 ainew[0] = -shift; 318 /* don't know how many column pointers are needed so estimate */ 319 jmax = (int) (f*ai[n]+(!shift)); 320 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 321 /* fill is a linked list of nonzeros in active row */ 322 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 323 im = fill + n + 1; 324 /* idnew is location of diagonal in factor */ 325 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 326 idnew[0] = -shift; 327 328 for ( i=0; i<n; i++ ) { 329 /* first copy previous fill into linked list */ 330 nnz = nz = ai[r[i]+1] - ai[r[i]]; 331 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 332 ajtmp = aj + ai[r[i]] + shift; 333 fill[n] = n; 334 while (nz--) { 335 fm = n; 336 idx = ic[*ajtmp++ + shift]; 337 do { 338 m = fm; 339 fm = fill[m]; 340 } while (fm < idx); 341 fill[m] = idx; 342 fill[idx] = fm; 343 } 344 row = fill[n]; 345 while ( row < i ) { 346 ajtmp = ajnew + idnew[row] + (!shift); 347 nzbd = 1 + idnew[row] - ainew[row]; 348 nz = im[row] - nzbd; 349 fm = row; 350 while (nz-- > 0) { 351 idx = *ajtmp++ + shift; 352 nzbd++; 353 if (idx == i) im[row] = nzbd; 354 do { 355 m = fm; 356 fm = fill[m]; 357 } while (fm < idx); 358 if (fm != idx) { 359 fill[m] = idx; 360 fill[idx] = fm; 361 fm = idx; 362 nnz++; 363 } 364 } 365 row = fill[row]; 366 } 367 /* copy new filled row into permanent storage */ 368 ainew[i+1] = ainew[i] + nnz; 369 if (ainew[i+1] > jmax) { 370 371 /* estimate how much additional space we will need */ 372 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 373 /* just double the memory each time */ 374 int maxadd = jmax; 375 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 376 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 377 jmax += maxadd; 378 379 /* allocate a longer ajnew */ 380 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 381 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 382 ierr = PetscFree(ajnew);CHKERRQ(ierr); 383 ajnew = ajtmp; 384 realloc++; /* count how many times we realloc */ 385 } 386 ajtmp = ajnew + ainew[i] + shift; 387 fm = fill[n]; 388 nzi = 0; 389 im[i] = nnz; 390 while (nnz--) { 391 if (fm < i) nzi++; 392 *ajtmp++ = fm - shift; 393 fm = fill[fm]; 394 } 395 idnew[i] = ainew[i] + nzi; 396 } 397 if (ai[n] != 0) { 398 double af = ((double)ainew[n])/((double)ai[n]); 399 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 400 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 401 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 402 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 403 } else { 404 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 405 } 406 407 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 408 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 409 410 ierr = PetscFree(fill);CHKERRQ(ierr); 411 412 /* put together the new matrix */ 413 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 414 PLogObjectParent(*B,isicol); 415 b = (Mat_SeqAIJ *) (*B)->data; 416 ierr = PetscFree(b->imax);CHKERRQ(ierr); 417 b->singlemalloc = PETSC_FALSE; 418 /* the next line frees the default space generated by the Create() */ 419 ierr = PetscFree(b->a);CHKERRQ(ierr); 420 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 421 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 422 b->j = ajnew; 423 b->i = ainew; 424 b->diag = idnew; 425 b->ilen = 0; 426 b->imax = 0; 427 b->row = isrow; 428 b->col = iscol; 429 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 430 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 431 b->icol = isicol; 432 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 433 /* In b structure: Free imax, ilen, old a, old j. 434 Allocate idnew, solve_work, new a, new j */ 435 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 436 b->maxnz = b->nz = ainew[n] + shift; 437 438 (*B)->factor = FACTOR_LU;; 439 (*B)->info.factor_mallocs = realloc; 440 (*B)->info.fill_ratio_given = f; 441 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 442 443 if (ai[n] != 0) { 444 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 445 } else { 446 (*B)->info.fill_ratio_needed = 0.0; 447 } 448 PetscFunctionReturn(0); 449 } 450 /* ----------------------------------------------------------- */ 451 extern int Mat_AIJ_CheckInode(Mat); 452 453 #undef __FUNC__ 454 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 455 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 456 { 457 Mat C = *B; 458 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 459 IS isrow = b->row, isicol = b->icol; 460 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 461 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 462 int *diag_offset = b->diag,diag,k; 463 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 464 register int *pj; 465 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 466 double ssum; 467 register Scalar *pv, *rtmps,*u_values; 468 469 PetscFunctionBegin; 470 471 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 472 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 473 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 474 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 475 rtmps = rtmp + shift; ics = ic + shift; 476 477 /* precalculate row sums */ 478 if (preserve_row_sums) { 479 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 480 for ( i=0; i<n; i++ ) { 481 nz = a->i[r[i]+1] - a->i[r[i]]; 482 v = a->a + a->i[r[i]] + shift; 483 sum = 0.0; 484 for ( j=0; j<nz; j++ ) sum += v[j]; 485 rowsums[i] = sum; 486 } 487 } 488 489 for ( i=0; i<n; i++ ) { 490 nz = ai[i+1] - ai[i]; 491 ajtmp = aj + ai[i] + shift; 492 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 493 494 /* load in initial (unfactored row) */ 495 nz = a->i[r[i]+1] - a->i[r[i]]; 496 ajtmpold = a->j + a->i[r[i]] + shift; 497 v = a->a + a->i[r[i]] + shift; 498 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 499 500 row = *ajtmp++ + shift; 501 while (row < i ) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row] + shift; 505 pj = b->j + diag_offset[row] + (!shift); 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = ai[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 PLogFlops(2*nz); 511 } 512 row = *ajtmp++ + shift; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + ai[i] + shift; 516 pj = b->j + ai[i] + shift; 517 nz = ai[i+1] - ai[i]; 518 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 519 diag = diag_offset[i] - ai[i]; 520 /* 521 Possibly adjust diagonal entry on current row to force 522 LU matrix to have same row sum as initial matrix. 523 */ 524 if (pv[diag] == 0.0) { 525 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 526 } 527 if (preserve_row_sums) { 528 pj = b->j + ai[i] + shift; 529 sum = rowsums[i]; 530 for ( j=0; j<diag; j++ ) { 531 u_values = b->a + diag_offset[pj[j]] + shift; 532 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 533 inner_sum = 0.0; 534 for ( k=0; k<nz; k++ ) { 535 inner_sum += u_values[k]; 536 } 537 sum -= pv[j]*inner_sum; 538 539 } 540 nz = ai[i+1] - diag_offset[i] - 1; 541 u_values = b->a + diag_offset[i] + 1 + shift; 542 for ( k=0; k<nz; k++ ) { 543 sum -= u_values[k]; 544 } 545 ssum = PetscAbsScalar(sum/pv[diag]); 546 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 547 } 548 /* check pivot entry for current row */ 549 } 550 551 /* invert diagonal entries for simplier triangular solves */ 552 for ( i=0; i<n; i++ ) { 553 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 554 } 555 556 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 557 ierr = PetscFree(rtmp);CHKERRQ(ierr); 558 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 559 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 560 C->factor = FACTOR_LU; 561 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 562 C->assembled = PETSC_TRUE; 563 PLogFlops(b->n); 564 PetscFunctionReturn(0); 565 } 566 /* ----------------------------------------------------------- */ 567 #undef __FUNC__ 568 #define __FUNC__ "MatLUFactor_SeqAIJ" 569 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 570 { 571 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 572 int ierr,refct; 573 Mat C; 574 PetscOps *Abops; 575 MatOps Aops; 576 577 PetscFunctionBegin; 578 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 579 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 580 581 /* free all the data structures from mat */ 582 ierr = PetscFree(mat->a);CHKERRQ(ierr); 583 if (!mat->singlemalloc) { 584 ierr = PetscFree(mat->i);CHKERRQ(ierr); 585 ierr = PetscFree(mat->j);CHKERRQ(ierr); 586 } 587 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 588 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 589 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 590 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 591 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 592 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 593 ierr = PetscFree(mat);CHKERRQ(ierr); 594 595 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 596 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 597 598 /* 599 This is horrible, horrible code. We need to keep the 600 A pointers for the bops and ops but copy everything 601 else from C. 602 */ 603 Abops = A->bops; 604 Aops = A->ops; 605 refct = A->refct; 606 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 607 mat = (Mat_SeqAIJ *) A->data; 608 PLogObjectParent(A,mat->icol); 609 610 A->bops = Abops; 611 A->ops = Aops; 612 A->qlist = 0; 613 A->refct = refct; 614 /* copy over the type_name and name */ 615 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 616 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 617 618 PetscHeaderDestroy(C); 619 620 PetscFunctionReturn(0); 621 } 622 /* ----------------------------------------------------------- */ 623 #undef __FUNC__ 624 #define __FUNC__ "MatSolve_SeqAIJ" 625 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 626 { 627 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 628 IS iscol = a->col, isrow = a->row; 629 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 630 int nz,shift = a->indexshift,*rout,*cout; 631 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 632 633 PetscFunctionBegin; 634 if (!n) PetscFunctionReturn(0); 635 636 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 638 tmp = a->solve_work; 639 640 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 641 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 642 643 /* forward solve the lower triangular */ 644 tmp[0] = b[*r++]; 645 tmps = tmp + shift; 646 for ( i=1; i<n; i++ ) { 647 v = aa + ai[i] + shift; 648 vi = aj + ai[i] + shift; 649 nz = a->diag[i] - ai[i]; 650 sum = b[*r++]; 651 while (nz--) sum -= *v++ * tmps[*vi++]; 652 tmp[i] = sum; 653 } 654 655 /* backward solve the upper triangular */ 656 for ( i=n-1; i>=0; i-- ){ 657 v = aa + a->diag[i] + (!shift); 658 vi = aj + a->diag[i] + (!shift); 659 nz = ai[i+1] - a->diag[i] - 1; 660 sum = tmp[i]; 661 while (nz--) sum -= *v++ * tmps[*vi++]; 662 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 663 } 664 665 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 666 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 667 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 668 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 669 PLogFlops(2*a->nz - a->n); 670 PetscFunctionReturn(0); 671 } 672 673 /* ----------------------------------------------------------- */ 674 #undef __FUNC__ 675 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 676 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 677 { 678 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 679 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 680 Scalar *x,*b, *aa = a->a, sum; 681 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 682 int adiag_i,i,*vi,nz,ai_i; 683 Scalar *v; 684 #endif 685 686 PetscFunctionBegin; 687 if (!n) PetscFunctionReturn(0); 688 if (a->indexshift) { 689 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 694 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 695 696 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 697 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 698 #else 699 /* forward solve the lower triangular */ 700 x[0] = b[0]; 701 for ( i=1; i<n; i++ ) { 702 ai_i = ai[i]; 703 v = aa + ai_i; 704 vi = aj + ai_i; 705 nz = adiag[i] - ai_i; 706 sum = b[i]; 707 while (nz--) sum -= *v++ * x[*vi++]; 708 x[i] = sum; 709 } 710 711 /* backward solve the upper triangular */ 712 for ( i=n-1; i>=0; i-- ){ 713 adiag_i = adiag[i]; 714 v = aa + adiag_i + 1; 715 vi = aj + adiag_i + 1; 716 nz = ai[i+1] - adiag_i - 1; 717 sum = x[i]; 718 while (nz--) sum -= *v++ * x[*vi++]; 719 x[i] = sum*aa[adiag_i]; 720 } 721 #endif 722 PLogFlops(2*a->nz - a->n); 723 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatSolveAdd_SeqAIJ" 730 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 731 { 732 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 733 IS iscol = a->col, isrow = a->row; 734 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 735 int nz, shift = a->indexshift,*rout,*cout; 736 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 737 738 PetscFunctionBegin; 739 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 740 741 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 742 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 743 tmp = a->solve_work; 744 745 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 746 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 747 748 /* forward solve the lower triangular */ 749 tmp[0] = b[*r++]; 750 for ( i=1; i<n; i++ ) { 751 v = aa + ai[i] + shift; 752 vi = aj + ai[i] + shift; 753 nz = a->diag[i] - ai[i]; 754 sum = b[*r++]; 755 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 756 tmp[i] = sum; 757 } 758 759 /* backward solve the upper triangular */ 760 for ( i=n-1; i>=0; i-- ){ 761 v = aa + a->diag[i] + (!shift); 762 vi = aj + a->diag[i] + (!shift); 763 nz = ai[i+1] - a->diag[i] - 1; 764 sum = tmp[i]; 765 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 766 tmp[i] = sum*aa[a->diag[i]+shift]; 767 x[*c--] += tmp[i]; 768 } 769 770 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 771 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 772 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 773 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 774 PLogFlops(2*a->nz); 775 776 PetscFunctionReturn(0); 777 } 778 /* -------------------------------------------------------------------*/ 779 #undef __FUNC__ 780 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 781 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 782 { 783 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 784 IS iscol = a->col, isrow = a->row; 785 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 786 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 787 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 788 789 PetscFunctionBegin; 790 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 791 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 792 tmp = a->solve_work; 793 794 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 795 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 796 797 /* copy the b into temp work space according to permutation */ 798 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 799 800 /* forward solve the U^T */ 801 for ( i=0; i<n; i++ ) { 802 v = aa + diag[i] + shift; 803 vi = aj + diag[i] + (!shift); 804 nz = ai[i+1] - diag[i] - 1; 805 s1 = tmp[i]; 806 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 807 while (nz--) { 808 tmp[*vi++ + shift] -= (*v++)*s1; 809 } 810 tmp[i] = s1; 811 } 812 813 /* backward solve the L^T */ 814 for ( i=n-1; i>=0; i-- ){ 815 v = aa + diag[i] - 1 + shift; 816 vi = aj + diag[i] - 1 + shift; 817 nz = diag[i] - ai[i]; 818 s1 = tmp[i]; 819 while (nz--) { 820 tmp[*vi-- + shift] -= (*v--)*s1; 821 } 822 } 823 824 /* copy tmp into x according to permutation */ 825 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 826 827 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 828 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 829 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 830 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 831 832 PLogFlops(2*a->nz-a->n); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNC__ 837 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 838 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 839 { 840 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 841 IS iscol = a->col, isrow = a->row; 842 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 843 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 844 Scalar *x,*b,*tmp, *aa = a->a, *v; 845 846 PetscFunctionBegin; 847 if (zz != xx) VecCopy(zz,xx); 848 849 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 850 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 851 tmp = a->solve_work; 852 853 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 854 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 855 856 /* copy the b into temp work space according to permutation */ 857 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 858 859 /* forward solve the U^T */ 860 for ( i=0; i<n; i++ ) { 861 v = aa + diag[i] + shift; 862 vi = aj + diag[i] + (!shift); 863 nz = ai[i+1] - diag[i] - 1; 864 tmp[i] *= *v++; 865 while (nz--) { 866 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 867 } 868 } 869 870 /* backward solve the L^T */ 871 for ( i=n-1; i>=0; i-- ){ 872 v = aa + diag[i] - 1 + shift; 873 vi = aj + diag[i] - 1 + shift; 874 nz = diag[i] - ai[i]; 875 while (nz--) { 876 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 877 } 878 } 879 880 /* copy tmp into x according to permutation */ 881 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 882 883 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 884 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 885 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 888 PLogFlops(2*a->nz); 889 PetscFunctionReturn(0); 890 } 891 /* ----------------------------------------------------------------*/ 892 extern int MatMissingDiagonal_SeqAIJ(Mat); 893 894 #undef __FUNC__ 895 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 896 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 897 { 898 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 899 IS isicol; 900 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 901 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 902 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 903 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 904 PetscTruth col_identity, row_identity; 905 double f; 906 907 PetscFunctionBegin; 908 if (info) { 909 f = info->fill; 910 levels = (int) info->levels; 911 diagonal_fill = (int) info->diagonal_fill; 912 } else { 913 f = 1.0; 914 levels = 0; 915 diagonal_fill = 0; 916 } 917 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 918 919 /* special case that simply copies fill pattern */ 920 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 921 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 922 if (!levels && row_identity && col_identity) { 923 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 924 (*fact)->factor = FACTOR_LU; 925 b = (Mat_SeqAIJ *) (*fact)->data; 926 if (!b->diag) { 927 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 928 } 929 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 930 b->row = isrow; 931 b->col = iscol; 932 b->icol = isicol; 933 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 934 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 935 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 936 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 940 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 941 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 942 943 /* get new row pointers */ 944 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 945 ainew[0] = -shift; 946 /* don't know how many column pointers are needed so estimate */ 947 jmax = (int) (f*(ai[n]+!shift)); 948 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 949 /* ajfill is level of fill for each fill entry */ 950 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 951 /* fill is a linked list of nonzeros in active row */ 952 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 953 /* im is level for each filled value */ 954 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 955 /* dloc is location of diagonal in factor */ 956 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 957 dloc[0] = 0; 958 for ( prow=0; prow<n; prow++ ) { 959 960 /* copy current row into linked list */ 961 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 962 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 963 xi = aj + ai[r[prow]] + shift; 964 fill[n] = n; 965 fill[prow] = -1; /* marker to indicate if diagonal exists */ 966 while (nz--) { 967 fm = n; 968 idx = ic[*xi++ + shift]; 969 do { 970 m = fm; 971 fm = fill[m]; 972 } while (fm < idx); 973 fill[m] = idx; 974 fill[idx] = fm; 975 im[idx] = 0; 976 } 977 978 /* make sure diagonal entry is included */ 979 if (diagonal_fill && fill[prow] == -1) { 980 fm = n; 981 while (fill[fm] < prow) fm = fill[fm]; 982 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 983 fill[fm] = prow; 984 im[prow] = 0; 985 nzf++; 986 dcount++; 987 } 988 989 nzi = 0; 990 row = fill[n]; 991 while ( row < prow ) { 992 incrlev = im[row] + 1; 993 nz = dloc[row]; 994 xi = ajnew + ainew[row] + shift + nz + 1; 995 flev = ajfill + ainew[row] + shift + nz + 1; 996 nnz = ainew[row+1] - ainew[row] - nz - 1; 997 fm = row; 998 while (nnz-- > 0) { 999 idx = *xi++ + shift; 1000 if (*flev + incrlev > levels) { 1001 flev++; 1002 continue; 1003 } 1004 do { 1005 m = fm; 1006 fm = fill[m]; 1007 } while (fm < idx); 1008 if (fm != idx) { 1009 im[idx] = *flev + incrlev; 1010 fill[m] = idx; 1011 fill[idx] = fm; 1012 fm = idx; 1013 nzf++; 1014 } else { 1015 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1016 } 1017 flev++; 1018 } 1019 row = fill[row]; 1020 nzi++; 1021 } 1022 /* copy new filled row into permanent storage */ 1023 ainew[prow+1] = ainew[prow] + nzf; 1024 if (ainew[prow+1] > jmax-shift) { 1025 1026 /* estimate how much additional space we will need */ 1027 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1028 /* just double the memory each time */ 1029 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1030 int maxadd = jmax; 1031 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1032 jmax += maxadd; 1033 1034 /* allocate a longer ajnew and ajfill */ 1035 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1036 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1037 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1038 ajnew = xi; 1039 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1040 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ajfill = xi; 1043 realloc++; /* count how many times we realloc */ 1044 } 1045 xi = ajnew + ainew[prow] + shift; 1046 flev = ajfill + ainew[prow] + shift; 1047 dloc[prow] = nzi; 1048 fm = fill[n]; 1049 while (nzf--) { 1050 *xi++ = fm - shift; 1051 *flev++ = im[fm]; 1052 fm = fill[fm]; 1053 } 1054 /* make sure row has diagonal entry */ 1055 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1056 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1057 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1058 } 1059 } 1060 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1061 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1062 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1063 ierr = PetscFree(fill);CHKERRQ(ierr); 1064 ierr = PetscFree(im);CHKERRQ(ierr); 1065 1066 { 1067 double af = ((double)ainew[n])/((double)ai[n]); 1068 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1069 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1070 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1071 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1072 if (diagonal_fill) { 1073 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1074 } 1075 } 1076 1077 /* put together the new matrix */ 1078 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1079 PLogObjectParent(*fact,isicol); 1080 b = (Mat_SeqAIJ *) (*fact)->data; 1081 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1082 b->singlemalloc = PETSC_FALSE; 1083 len = (ainew[n] + shift)*sizeof(Scalar); 1084 /* the next line frees the default space generated by the Create() */ 1085 ierr = PetscFree(b->a);CHKERRQ(ierr); 1086 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1087 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1088 b->j = ajnew; 1089 b->i = ainew; 1090 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1091 b->diag = dloc; 1092 b->ilen = 0; 1093 b->imax = 0; 1094 b->row = isrow; 1095 b->col = iscol; 1096 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1097 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1098 b->icol = isicol; 1099 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1100 /* In b structure: Free imax, ilen, old a, old j. 1101 Allocate dloc, solve_work, new a, new j */ 1102 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1103 b->maxnz = b->nz = ainew[n] + shift; 1104 (*fact)->factor = FACTOR_LU; 1105 1106 (*fact)->info.factor_mallocs = realloc; 1107 (*fact)->info.fill_ratio_given = f; 1108 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1109 (*fact)->factor = FACTOR_LU;; 1110 1111 PetscFunctionReturn(0); 1112 } 1113 1114 1115 1116 1117