1 /*$Id: aijfact.c,v 1.132 1999/12/16 23:11:29 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isrowf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job,*o_i,*o_j; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98 99 100 /* ------------------------------------------------------------ 101 If reorder=.TRUE., then the original matrix has to be 102 reordered to reflect the user selected ordering scheme, and 103 then de-reordered so it is in it's original format. 104 Because Saad's dperm() is NOT in place, we have to copy 105 the original matrix and allocate more storage. . . 106 ------------------------------------------------------------ 107 */ 108 109 /* set reorder to true if either isrow or iscol is not identity */ 110 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112 reorder = PetscNot(reorder); 113 114 115 /* storage for ilu factor */ 116 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 117 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 118 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 119 120 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 121 122 /* ------------------------------------------------------------ 123 Make sure that everything is Fortran formatted (1-Based) 124 ------------------------------------------------------------ 125 */ 126 if (ishift) { 127 for (i=old_i[0];i<old_i[n];i++) { 128 old_j[i]++; 129 } 130 for(i=0;i<n+1;i++) { 131 old_i[i]++; 132 }; 133 }; 134 135 if (reorder) { 136 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 137 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 138 for(i=0;i<n;i++) { 139 r[i] = r[i]+1; 140 c[i] = c[i]+1; 141 } 142 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 143 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 144 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 145 job = 3; dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 146 for (i=0;i<n;i++) { 147 r[i] = r[i]-1; 148 c[i] = c[i]-1; 149 } 150 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 151 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 152 o_a = old_a2; 153 o_j = old_j2; 154 o_i = old_i2; 155 } else { 156 o_a = old_a; 157 o_j = old_j; 158 o_i = old_i; 159 } 160 161 /* ------------------------------------------------------------ 162 Call Saad's ilutp() routine to generate the factorization 163 ------------------------------------------------------------ 164 */ 165 166 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 167 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 168 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 169 170 ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 171 if (ierr) { 172 switch (ierr) { 173 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 174 break; 175 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 176 break; 177 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 178 break; 179 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 180 break; 181 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 182 break; 183 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 184 } 185 } 186 187 ierr = PetscFree(w);CHKERRQ(ierr); 188 ierr = PetscFree(jw);CHKERRQ(ierr); 189 190 /* ------------------------------------------------------------ 191 Saad's routine gives the result in Modified Sparse Row (msr) 192 Convert to Compressed Sparse Row format (csr) 193 ------------------------------------------------------------ 194 */ 195 196 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 197 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 198 199 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 200 201 ierr = PetscFree(iwk);CHKERRQ(ierr); 202 ierr = PetscFree(wk);CHKERRQ(ierr); 203 204 if (reorder) { 205 ierr = PetscFree(old_a2);CHKERRQ(ierr); 206 ierr = PetscFree(old_j2);CHKERRQ(ierr); 207 ierr = PetscFree(old_i2);CHKERRQ(ierr); 208 } else { 209 /* fix permutation of old_j that the factorization introduced */ 210 for (i=old_i[0]; i<=old_i[n]; i++) { 211 old_j[i-1] = iperm[old_j[i-1]-1]; 212 } 213 } 214 215 /* get rid of the shift to indices starting at 1 */ 216 if (ishift) { 217 for (i=0; i<n+1; i++) { 218 old_i[i]--; 219 } 220 for (i=old_i[0];i<old_i[n];i++) { 221 old_j[i]--; 222 } 223 } 224 225 /* Make the factored matrix 0-based */ 226 if (ishift) { 227 for (i=0; i<n+1; i++) { 228 new_i[i]--; 229 } 230 for (i=new_i[0];i<new_i[n];i++) { 231 new_j[i]--; 232 } 233 } 234 235 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 236 /*-- permute the right-hand-side and solution vectors --*/ 237 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 238 for(i=0; i<n; i++) { 239 ordcol[i] = ic[iperm[i]-1]; 240 }; 241 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 242 243 ierr = PetscFree(iperm);CHKERRQ(ierr); 244 245 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 246 247 /*----- put together the new matrix -----*/ 248 249 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 250 (*fact)->factor = FACTOR_LU; 251 (*fact)->assembled = PETSC_TRUE; 252 253 b = (Mat_SeqAIJ *) (*fact)->data; 254 ierr = PetscFree(b->imax);CHKERRQ(ierr); 255 b->sorted = PETSC_FALSE; 256 b->singlemalloc = PETSC_TRUE; 257 /* the next line frees the default space generated by the MatCreate() */ 258 ierr = PetscFree(b->a);CHKERRQ(ierr); 259 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 260 b->a = new_a; 261 b->j = new_j; 262 b->i = new_i; 263 b->ilen = 0; 264 b->imax = 0; 265 b->row = isirow; 266 ierr = PetscObjectReference((PetscObject)isirow);CHKERRQ(ierr); 267 b->col = iscolf; 268 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 269 b->maxnz = b->nz = new_i[n]; 270 b->indexshift = a->indexshift; 271 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 272 (*fact)->info.factor_mallocs = 0; 273 274 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 275 276 /* check out for identical nodes. If found, use inode functions */ 277 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 278 279 PetscFunctionReturn(0); 280 } 281 #else 282 #undef __FUNC__ 283 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 284 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 285 { 286 PetscFunctionBegin; 287 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 288 } 289 #endif 290 291 /* 292 Factorization code for AIJ format. 293 */ 294 #undef __FUNC__ 295 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 296 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 297 { 298 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 299 IS isicol; 300 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 301 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 302 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 303 304 PetscFunctionBegin; 305 PetscValidHeaderSpecific(isrow,IS_COOKIE); 306 PetscValidHeaderSpecific(iscol,IS_COOKIE); 307 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 308 309 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 310 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 311 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 312 313 /* get new row pointers */ 314 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 315 ainew[0] = -shift; 316 /* don't know how many column pointers are needed so estimate */ 317 jmax = (int) (f*ai[n]+(!shift)); 318 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 319 /* fill is a linked list of nonzeros in active row */ 320 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 321 im = fill + n + 1; 322 /* idnew is location of diagonal in factor */ 323 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 324 idnew[0] = -shift; 325 326 for ( i=0; i<n; i++ ) { 327 /* first copy previous fill into linked list */ 328 nnz = nz = ai[r[i]+1] - ai[r[i]]; 329 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 330 ajtmp = aj + ai[r[i]] + shift; 331 fill[n] = n; 332 while (nz--) { 333 fm = n; 334 idx = ic[*ajtmp++ + shift]; 335 do { 336 m = fm; 337 fm = fill[m]; 338 } while (fm < idx); 339 fill[m] = idx; 340 fill[idx] = fm; 341 } 342 row = fill[n]; 343 while ( row < i ) { 344 ajtmp = ajnew + idnew[row] + (!shift); 345 nzbd = 1 + idnew[row] - ainew[row]; 346 nz = im[row] - nzbd; 347 fm = row; 348 while (nz-- > 0) { 349 idx = *ajtmp++ + shift; 350 nzbd++; 351 if (idx == i) im[row] = nzbd; 352 do { 353 m = fm; 354 fm = fill[m]; 355 } while (fm < idx); 356 if (fm != idx) { 357 fill[m] = idx; 358 fill[idx] = fm; 359 fm = idx; 360 nnz++; 361 } 362 } 363 row = fill[row]; 364 } 365 /* copy new filled row into permanent storage */ 366 ainew[i+1] = ainew[i] + nnz; 367 if (ainew[i+1] > jmax) { 368 369 /* estimate how much additional space we will need */ 370 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 371 /* just double the memory each time */ 372 int maxadd = jmax; 373 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 374 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 375 jmax += maxadd; 376 377 /* allocate a longer ajnew */ 378 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 379 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 380 ierr = PetscFree(ajnew);CHKERRQ(ierr); 381 ajnew = ajtmp; 382 realloc++; /* count how many times we realloc */ 383 } 384 ajtmp = ajnew + ainew[i] + shift; 385 fm = fill[n]; 386 nzi = 0; 387 im[i] = nnz; 388 while (nnz--) { 389 if (fm < i) nzi++; 390 *ajtmp++ = fm - shift; 391 fm = fill[fm]; 392 } 393 idnew[i] = ainew[i] + nzi; 394 } 395 if (ai[n] != 0) { 396 double af = ((double)ainew[n])/((double)ai[n]); 397 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 398 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 399 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 400 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 401 } else { 402 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 403 } 404 405 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 406 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 407 408 ierr = PetscFree(fill);CHKERRQ(ierr); 409 410 /* put together the new matrix */ 411 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 412 PLogObjectParent(*B,isicol); 413 b = (Mat_SeqAIJ *) (*B)->data; 414 ierr = PetscFree(b->imax);CHKERRQ(ierr); 415 b->singlemalloc = PETSC_FALSE; 416 /* the next line frees the default space generated by the Create() */ 417 ierr = PetscFree(b->a);CHKERRQ(ierr); 418 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 419 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 420 b->j = ajnew; 421 b->i = ainew; 422 b->diag = idnew; 423 b->ilen = 0; 424 b->imax = 0; 425 b->row = isrow; 426 b->col = iscol; 427 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 428 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 429 b->icol = isicol; 430 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 431 /* In b structure: Free imax, ilen, old a, old j. 432 Allocate idnew, solve_work, new a, new j */ 433 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 434 b->maxnz = b->nz = ainew[n] + shift; 435 436 (*B)->factor = FACTOR_LU;; 437 (*B)->info.factor_mallocs = realloc; 438 (*B)->info.fill_ratio_given = f; 439 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 440 441 if (ai[n] != 0) { 442 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 443 } else { 444 (*B)->info.fill_ratio_needed = 0.0; 445 } 446 PetscFunctionReturn(0); 447 } 448 /* ----------------------------------------------------------- */ 449 extern int Mat_AIJ_CheckInode(Mat); 450 451 #undef __FUNC__ 452 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 453 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 454 { 455 Mat C = *B; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 457 IS isrow = b->row, isicol = b->icol; 458 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 459 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 460 int *diag_offset = b->diag,diag,k; 461 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 462 register int *pj; 463 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 464 double ssum; 465 register Scalar *pv, *rtmps,*u_values; 466 467 PetscFunctionBegin; 468 469 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 470 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 471 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 472 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 473 rtmps = rtmp + shift; ics = ic + shift; 474 475 /* precalculate row sums */ 476 if (preserve_row_sums) { 477 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 478 for ( i=0; i<n; i++ ) { 479 nz = a->i[r[i]+1] - a->i[r[i]]; 480 v = a->a + a->i[r[i]] + shift; 481 sum = 0.0; 482 for ( j=0; j<nz; j++ ) sum += v[j]; 483 rowsums[i] = sum; 484 } 485 } 486 487 for ( i=0; i<n; i++ ) { 488 nz = ai[i+1] - ai[i]; 489 ajtmp = aj + ai[i] + shift; 490 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmpold = a->j + a->i[r[i]] + shift; 495 v = a->a + a->i[r[i]] + shift; 496 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 497 498 row = *ajtmp++ + shift; 499 while (row < i ) { 500 pc = rtmp + row; 501 if (*pc != 0.0) { 502 pv = b->a + diag_offset[row] + shift; 503 pj = b->j + diag_offset[row] + (!shift); 504 multiplier = *pc / *pv++; 505 *pc = multiplier; 506 nz = ai[row+1] - diag_offset[row] - 1; 507 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 508 PLogFlops(2*nz); 509 } 510 row = *ajtmp++ + shift; 511 } 512 /* finished row so stick it into b->a */ 513 pv = b->a + ai[i] + shift; 514 pj = b->j + ai[i] + shift; 515 nz = ai[i+1] - ai[i]; 516 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 517 diag = diag_offset[i] - ai[i]; 518 /* 519 Possibly adjust diagonal entry on current row to force 520 LU matrix to have same row sum as initial matrix. 521 */ 522 if (pv[diag] == 0.0) { 523 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 524 } 525 if (preserve_row_sums) { 526 pj = b->j + ai[i] + shift; 527 sum = rowsums[i]; 528 for ( j=0; j<diag; j++ ) { 529 u_values = b->a + diag_offset[pj[j]] + shift; 530 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 531 inner_sum = 0.0; 532 for ( k=0; k<nz; k++ ) { 533 inner_sum += u_values[k]; 534 } 535 sum -= pv[j]*inner_sum; 536 537 } 538 nz = ai[i+1] - diag_offset[i] - 1; 539 u_values = b->a + diag_offset[i] + 1 + shift; 540 for ( k=0; k<nz; k++ ) { 541 sum -= u_values[k]; 542 } 543 ssum = PetscAbsScalar(sum/pv[diag]); 544 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 545 } 546 /* check pivot entry for current row */ 547 } 548 549 /* invert diagonal entries for simplier triangular solves */ 550 for ( i=0; i<n; i++ ) { 551 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 552 } 553 554 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 555 ierr = PetscFree(rtmp);CHKERRQ(ierr); 556 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 557 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 558 C->factor = FACTOR_LU; 559 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 560 C->assembled = PETSC_TRUE; 561 PLogFlops(b->n); 562 PetscFunctionReturn(0); 563 } 564 /* ----------------------------------------------------------- */ 565 #undef __FUNC__ 566 #define __FUNC__ "MatLUFactor_SeqAIJ" 567 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 568 { 569 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 570 int ierr,refct; 571 Mat C; 572 PetscOps *Abops; 573 MatOps Aops; 574 575 PetscFunctionBegin; 576 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 577 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 578 579 /* free all the data structures from mat */ 580 ierr = PetscFree(mat->a);CHKERRQ(ierr); 581 if (!mat->singlemalloc) { 582 ierr = PetscFree(mat->i);CHKERRQ(ierr); 583 ierr = PetscFree(mat->j);CHKERRQ(ierr); 584 } 585 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 586 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 587 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 588 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 589 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 590 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 591 ierr = PetscFree(mat);CHKERRQ(ierr); 592 593 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 594 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 595 596 /* 597 This is horrible, horrible code. We need to keep the 598 A pointers for the bops and ops but copy everything 599 else from C. 600 */ 601 Abops = A->bops; 602 Aops = A->ops; 603 refct = A->refct; 604 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 605 mat = (Mat_SeqAIJ *) A->data; 606 PLogObjectParent(A,mat->icol); 607 608 A->bops = Abops; 609 A->ops = Aops; 610 A->qlist = 0; 611 A->refct = refct; 612 /* copy over the type_name and name */ 613 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 614 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 615 616 PetscHeaderDestroy(C); 617 618 PetscFunctionReturn(0); 619 } 620 /* ----------------------------------------------------------- */ 621 #undef __FUNC__ 622 #define __FUNC__ "MatSolve_SeqAIJ" 623 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 624 { 625 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 626 IS iscol = a->col, isrow = a->row; 627 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 628 int nz,shift = a->indexshift,*rout,*cout; 629 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 630 631 PetscFunctionBegin; 632 if (!n) PetscFunctionReturn(0); 633 634 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 635 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 636 tmp = a->solve_work; 637 638 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 639 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 640 641 /* forward solve the lower triangular */ 642 tmp[0] = b[*r++]; 643 tmps = tmp + shift; 644 for ( i=1; i<n; i++ ) { 645 v = aa + ai[i] + shift; 646 vi = aj + ai[i] + shift; 647 nz = a->diag[i] - ai[i]; 648 sum = b[*r++]; 649 while (nz--) sum -= *v++ * tmps[*vi++]; 650 tmp[i] = sum; 651 } 652 653 /* backward solve the upper triangular */ 654 for ( i=n-1; i>=0; i-- ){ 655 v = aa + a->diag[i] + (!shift); 656 vi = aj + a->diag[i] + (!shift); 657 nz = ai[i+1] - a->diag[i] - 1; 658 sum = tmp[i]; 659 while (nz--) sum -= *v++ * tmps[*vi++]; 660 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 661 } 662 663 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 664 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 665 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 666 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 667 PLogFlops(2*a->nz - a->n); 668 PetscFunctionReturn(0); 669 } 670 671 /* ----------------------------------------------------------- */ 672 #undef __FUNC__ 673 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 674 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 675 { 676 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 677 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 678 Scalar *x,*b, *aa = a->a, sum; 679 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 680 int adiag_i,i,*vi,nz,ai_i; 681 Scalar *v; 682 #endif 683 684 PetscFunctionBegin; 685 if (!n) PetscFunctionReturn(0); 686 if (a->indexshift) { 687 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 692 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 693 694 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 695 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 696 #else 697 /* forward solve the lower triangular */ 698 x[0] = b[0]; 699 for ( i=1; i<n; i++ ) { 700 ai_i = ai[i]; 701 v = aa + ai_i; 702 vi = aj + ai_i; 703 nz = adiag[i] - ai_i; 704 sum = b[i]; 705 while (nz--) sum -= *v++ * x[*vi++]; 706 x[i] = sum; 707 } 708 709 /* backward solve the upper triangular */ 710 for ( i=n-1; i>=0; i-- ){ 711 adiag_i = adiag[i]; 712 v = aa + adiag_i + 1; 713 vi = aj + adiag_i + 1; 714 nz = ai[i+1] - adiag_i - 1; 715 sum = x[i]; 716 while (nz--) sum -= *v++ * x[*vi++]; 717 x[i] = sum*aa[adiag_i]; 718 } 719 #endif 720 PLogFlops(2*a->nz - a->n); 721 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 722 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNC__ 727 #define __FUNC__ "MatSolveAdd_SeqAIJ" 728 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 729 { 730 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 731 IS iscol = a->col, isrow = a->row; 732 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 733 int nz, shift = a->indexshift,*rout,*cout; 734 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 735 736 PetscFunctionBegin; 737 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 738 739 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 740 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 741 tmp = a->solve_work; 742 743 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 744 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 745 746 /* forward solve the lower triangular */ 747 tmp[0] = b[*r++]; 748 for ( i=1; i<n; i++ ) { 749 v = aa + ai[i] + shift; 750 vi = aj + ai[i] + shift; 751 nz = a->diag[i] - ai[i]; 752 sum = b[*r++]; 753 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 754 tmp[i] = sum; 755 } 756 757 /* backward solve the upper triangular */ 758 for ( i=n-1; i>=0; i-- ){ 759 v = aa + a->diag[i] + (!shift); 760 vi = aj + a->diag[i] + (!shift); 761 nz = ai[i+1] - a->diag[i] - 1; 762 sum = tmp[i]; 763 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 764 tmp[i] = sum*aa[a->diag[i]+shift]; 765 x[*c--] += tmp[i]; 766 } 767 768 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 769 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 770 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 771 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 772 PLogFlops(2*a->nz); 773 774 PetscFunctionReturn(0); 775 } 776 /* -------------------------------------------------------------------*/ 777 #undef __FUNC__ 778 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 779 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 780 { 781 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 782 IS iscol = a->col, isrow = a->row; 783 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 784 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 785 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 786 787 PetscFunctionBegin; 788 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 790 tmp = a->solve_work; 791 792 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 793 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 794 795 /* copy the b into temp work space according to permutation */ 796 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 797 798 /* forward solve the U^T */ 799 for ( i=0; i<n; i++ ) { 800 v = aa + diag[i] + shift; 801 vi = aj + diag[i] + (!shift); 802 nz = ai[i+1] - diag[i] - 1; 803 s1 = tmp[i]; 804 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 805 while (nz--) { 806 tmp[*vi++ + shift] -= (*v++)*s1; 807 } 808 tmp[i] = s1; 809 } 810 811 /* backward solve the L^T */ 812 for ( i=n-1; i>=0; i-- ){ 813 v = aa + diag[i] - 1 + shift; 814 vi = aj + diag[i] - 1 + shift; 815 nz = diag[i] - ai[i]; 816 s1 = tmp[i]; 817 while (nz--) { 818 tmp[*vi-- + shift] -= (*v--)*s1; 819 } 820 } 821 822 /* copy tmp into x according to permutation */ 823 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 824 825 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 826 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 827 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 828 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 829 830 PLogFlops(2*a->nz-a->n); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNC__ 835 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 836 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 837 { 838 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 839 IS iscol = a->col, isrow = a->row; 840 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 841 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 842 Scalar *x,*b,*tmp, *aa = a->a, *v; 843 844 PetscFunctionBegin; 845 if (zz != xx) VecCopy(zz,xx); 846 847 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 tmp = a->solve_work; 850 851 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 852 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 853 854 /* copy the b into temp work space according to permutation */ 855 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 856 857 /* forward solve the U^T */ 858 for ( i=0; i<n; i++ ) { 859 v = aa + diag[i] + shift; 860 vi = aj + diag[i] + (!shift); 861 nz = ai[i+1] - diag[i] - 1; 862 tmp[i] *= *v++; 863 while (nz--) { 864 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 865 } 866 } 867 868 /* backward solve the L^T */ 869 for ( i=n-1; i>=0; i-- ){ 870 v = aa + diag[i] - 1 + shift; 871 vi = aj + diag[i] - 1 + shift; 872 nz = diag[i] - ai[i]; 873 while (nz--) { 874 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 875 } 876 } 877 878 /* copy tmp into x according to permutation */ 879 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 880 881 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 882 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 883 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 886 PLogFlops(2*a->nz); 887 PetscFunctionReturn(0); 888 } 889 /* ----------------------------------------------------------------*/ 890 extern int MatMissingDiagonal_SeqAIJ(Mat); 891 892 #undef __FUNC__ 893 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 894 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 895 { 896 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 897 IS isicol; 898 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 899 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 900 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 901 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 902 PetscTruth col_identity, row_identity; 903 double f; 904 905 PetscFunctionBegin; 906 if (info) { 907 f = info->fill; 908 levels = (int) info->levels; 909 diagonal_fill = (int) info->diagonal_fill; 910 } else { 911 f = 1.0; 912 levels = 0; 913 diagonal_fill = 0; 914 } 915 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 916 917 /* special case that simply copies fill pattern */ 918 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 919 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 920 if (!levels && row_identity && col_identity) { 921 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 922 (*fact)->factor = FACTOR_LU; 923 b = (Mat_SeqAIJ *) (*fact)->data; 924 if (!b->diag) { 925 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 926 } 927 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 928 b->row = isrow; 929 b->col = iscol; 930 b->icol = isicol; 931 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 932 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 933 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 934 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 939 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 940 941 /* get new row pointers */ 942 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 943 ainew[0] = -shift; 944 /* don't know how many column pointers are needed so estimate */ 945 jmax = (int) (f*(ai[n]+!shift)); 946 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 947 /* ajfill is level of fill for each fill entry */ 948 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 949 /* fill is a linked list of nonzeros in active row */ 950 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 951 /* im is level for each filled value */ 952 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 953 /* dloc is location of diagonal in factor */ 954 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 955 dloc[0] = 0; 956 for ( prow=0; prow<n; prow++ ) { 957 958 /* copy current row into linked list */ 959 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 960 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 961 xi = aj + ai[r[prow]] + shift; 962 fill[n] = n; 963 fill[prow] = -1; /* marker to indicate if diagonal exists */ 964 while (nz--) { 965 fm = n; 966 idx = ic[*xi++ + shift]; 967 do { 968 m = fm; 969 fm = fill[m]; 970 } while (fm < idx); 971 fill[m] = idx; 972 fill[idx] = fm; 973 im[idx] = 0; 974 } 975 976 /* make sure diagonal entry is included */ 977 if (diagonal_fill && fill[prow] == -1) { 978 fm = n; 979 while (fill[fm] < prow) fm = fill[fm]; 980 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 981 fill[fm] = prow; 982 im[prow] = 0; 983 nzf++; 984 dcount++; 985 } 986 987 nzi = 0; 988 row = fill[n]; 989 while ( row < prow ) { 990 incrlev = im[row] + 1; 991 nz = dloc[row]; 992 xi = ajnew + ainew[row] + shift + nz + 1; 993 flev = ajfill + ainew[row] + shift + nz + 1; 994 nnz = ainew[row+1] - ainew[row] - nz - 1; 995 fm = row; 996 while (nnz-- > 0) { 997 idx = *xi++ + shift; 998 if (*flev + incrlev > levels) { 999 flev++; 1000 continue; 1001 } 1002 do { 1003 m = fm; 1004 fm = fill[m]; 1005 } while (fm < idx); 1006 if (fm != idx) { 1007 im[idx] = *flev + incrlev; 1008 fill[m] = idx; 1009 fill[idx] = fm; 1010 fm = idx; 1011 nzf++; 1012 } else { 1013 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1014 } 1015 flev++; 1016 } 1017 row = fill[row]; 1018 nzi++; 1019 } 1020 /* copy new filled row into permanent storage */ 1021 ainew[prow+1] = ainew[prow] + nzf; 1022 if (ainew[prow+1] > jmax-shift) { 1023 1024 /* estimate how much additional space we will need */ 1025 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1026 /* just double the memory each time */ 1027 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1028 int maxadd = jmax; 1029 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1030 jmax += maxadd; 1031 1032 /* allocate a longer ajnew and ajfill */ 1033 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1034 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1035 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1036 ajnew = xi; 1037 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1038 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1039 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1040 ajfill = xi; 1041 realloc++; /* count how many times we realloc */ 1042 } 1043 xi = ajnew + ainew[prow] + shift; 1044 flev = ajfill + ainew[prow] + shift; 1045 dloc[prow] = nzi; 1046 fm = fill[n]; 1047 while (nzf--) { 1048 *xi++ = fm - shift; 1049 *flev++ = im[fm]; 1050 fm = fill[fm]; 1051 } 1052 /* make sure row has diagonal entry */ 1053 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1054 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1055 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1056 } 1057 } 1058 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1059 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1060 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1061 ierr = PetscFree(fill);CHKERRQ(ierr); 1062 ierr = PetscFree(im);CHKERRQ(ierr); 1063 1064 { 1065 double af = ((double)ainew[n])/((double)ai[n]); 1066 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1067 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1068 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1069 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1070 if (diagonal_fill) { 1071 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1072 } 1073 } 1074 1075 /* put together the new matrix */ 1076 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1077 PLogObjectParent(*fact,isicol); 1078 b = (Mat_SeqAIJ *) (*fact)->data; 1079 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1080 b->singlemalloc = PETSC_FALSE; 1081 len = (ainew[n] + shift)*sizeof(Scalar); 1082 /* the next line frees the default space generated by the Create() */ 1083 ierr = PetscFree(b->a);CHKERRQ(ierr); 1084 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1085 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1086 b->j = ajnew; 1087 b->i = ainew; 1088 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1089 b->diag = dloc; 1090 b->ilen = 0; 1091 b->imax = 0; 1092 b->row = isrow; 1093 b->col = iscol; 1094 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1095 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1096 b->icol = isicol; 1097 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1098 /* In b structure: Free imax, ilen, old a, old j. 1099 Allocate dloc, solve_work, new a, new j */ 1100 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1101 b->maxnz = b->nz = ainew[n] + shift; 1102 (*fact)->factor = FACTOR_LU; 1103 1104 (*fact)->info.factor_mallocs = realloc; 1105 (*fact)->info.fill_ratio_given = f; 1106 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1107 (*fact)->factor = FACTOR_LU;; 1108 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 1114 1115