1 /*$Id: aijfact.c,v 1.130 1999/12/16 15:57:45 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isrowf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *ordrow,*iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job,*o_i,*o_j; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98 99 100 /* ------------------------------------------------------------ 101 If reorder=.TRUE., then the original matrix has to be 102 reordered to reflect the user selected ordering scheme, and 103 then de-reordered so it is in it's original format. 104 Because Saad's dperm() is NOT in place, we have to copy 105 the original matrix and allocate more storage. . . 106 ------------------------------------------------------------ 107 */ 108 109 /* set reorder to true if either isrow or iscol is not identity */ 110 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112 reorder = PetscNot(reorder); 113 114 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 116 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 117 ierr = ISGetIndices(isirow,&ir); CHKERRQ(ierr); 118 119 /* storage for ilu factor */ 120 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 121 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 122 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 123 124 if (reorder) { 125 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 126 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 127 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 128 } 129 130 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 131 ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow); 132 133 /* ------------------------------------------------------------ 134 Make sure that everything is Fortran formatted (1-Based) 135 ------------------------------------------------------------ 136 */ 137 if (ishift) { 138 for (i=old_i[0];i<old_i[n];i++) { 139 old_j[i]++; 140 } 141 for(i=0;i<n+1;i++) { 142 old_i[i]++; 143 }; 144 }; 145 146 147 for(i=0;i<n;i++) { 148 r[i] = r[i]+1; 149 c[i] = c[i]+1; 150 ir[i] = ir[i]+1; 151 ic[i] = ic[i]+1; 152 } 153 154 if (reorder) { 155 job = 3; 156 dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 157 o_a = old_a2; 158 o_j = old_j2; 159 o_i = old_i2; 160 } else { 161 o_a = old_a; 162 o_j = old_j; 163 o_i = old_i; 164 } 165 166 /* ------------------------------------------------------------ 167 Call Saad's ilutp() routine to generate the factorization 168 ------------------------------------------------------------ 169 */ 170 171 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 172 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 173 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 174 175 ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 176 if (ierr) { 177 switch (ierr) { 178 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 179 break; 180 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 181 break; 182 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 183 break; 184 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 185 break; 186 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 187 break; 188 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 189 } 190 } 191 192 ierr = PetscFree(w);CHKERRQ(ierr); 193 ierr = PetscFree(jw);CHKERRQ(ierr); 194 195 196 /* ------------------------------------------------------------ 197 Saad's routine gives the result in Modified Sparse Row (msr) 198 Convert to Compressed Sparse Row format (csr) 199 ------------------------------------------------------------ 200 */ 201 202 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 203 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 204 205 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 206 207 ierr = PetscFree(iwk);CHKERRQ(ierr); 208 ierr = PetscFree(wk);CHKERRQ(ierr); 209 210 if (reorder) { 211 ierr = PetscFree(old_a2);CHKERRQ(ierr); 212 ierr = PetscFree(old_j2);CHKERRQ(ierr); 213 ierr = PetscFree(old_i2);CHKERRQ(ierr); 214 } 215 216 /* fix permutation of old_j that the factorization introduced */ 217 if (!reorder) { 218 for (i=old_i[0]; i<=old_i[n]; i++) { 219 old_j[i-1] = iperm[old_j[i-1]-1]; 220 } 221 } 222 223 /* get rid of the shift to indices starting at 1 */ 224 if (ishift) { 225 for (i=0; i<n+1; i++) { 226 old_i[i]--; 227 } 228 for (i=old_i[0];i<old_i[n];i++) { 229 old_j[i]--; 230 } 231 } 232 233 /* Make the factored matrix 0-based */ 234 if (ishift) { 235 for (i=0; i<n+1; i++) { 236 new_i[i]--; 237 } 238 for (i=new_i[0];i<new_i[n];i++) { 239 new_j[i]--; 240 } 241 } 242 243 for (i=0;i<n;i++) { 244 r[i] = r[i]-1; 245 c[i] = c[i]-1; 246 ir[i] = ir[i]-1; 247 ic[i] = ic[i]-1; 248 } 249 250 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 251 /*-- permute the right-hand-side and solution vectors --*/ 252 for(i=0; i<n; i++) { 253 ordcol[i] = ic[iperm[i]-1]; 254 ordrow[i] = ir[i]; 255 }; 256 257 ierr = PetscFree(iperm);CHKERRQ(ierr); 258 259 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 260 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 261 262 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 263 ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr); 264 265 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf); 267 268 /*----- put together the new matrix -----*/ 269 270 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 271 (*fact)->factor = FACTOR_LU; 272 (*fact)->assembled = PETSC_TRUE; 273 274 b = (Mat_SeqAIJ *) (*fact)->data; 275 ierr = PetscFree(b->imax);CHKERRQ(ierr); 276 b->sorted = PETSC_FALSE; 277 b->singlemalloc = PETSC_TRUE; 278 /* the next line frees the default space generated by the MatCreate() */ 279 ierr = PetscFree(b->a);CHKERRQ(ierr); 280 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 281 b->a = new_a; 282 b->j = new_j; 283 b->i = new_i; 284 b->ilen = 0; 285 b->imax = 0; 286 b->row = isrowf; 287 b->col = iscolf; 288 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 289 b->maxnz = b->nz = new_i[n]; 290 b->indexshift = a->indexshift; 291 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 292 (*fact)->info.factor_mallocs = 0; 293 294 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 295 296 /* check out for identical nodes. If found, use inode functions */ 297 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 298 299 PetscFunctionReturn(0); 300 } 301 #else 302 #undef __FUNC__ 303 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 304 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 305 { 306 PetscFunctionBegin; 307 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 308 } 309 #endif 310 311 /* 312 Factorization code for AIJ format. 313 */ 314 #undef __FUNC__ 315 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 316 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 317 { 318 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 319 IS isicol; 320 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 321 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 322 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 323 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(isrow,IS_COOKIE); 326 PetscValidHeaderSpecific(iscol,IS_COOKIE); 327 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 328 329 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 330 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 331 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 332 333 /* get new row pointers */ 334 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 335 ainew[0] = -shift; 336 /* don't know how many column pointers are needed so estimate */ 337 jmax = (int) (f*ai[n]+(!shift)); 338 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 339 /* fill is a linked list of nonzeros in active row */ 340 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 341 im = fill + n + 1; 342 /* idnew is location of diagonal in factor */ 343 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 344 idnew[0] = -shift; 345 346 for ( i=0; i<n; i++ ) { 347 /* first copy previous fill into linked list */ 348 nnz = nz = ai[r[i]+1] - ai[r[i]]; 349 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 350 ajtmp = aj + ai[r[i]] + shift; 351 fill[n] = n; 352 while (nz--) { 353 fm = n; 354 idx = ic[*ajtmp++ + shift]; 355 do { 356 m = fm; 357 fm = fill[m]; 358 } while (fm < idx); 359 fill[m] = idx; 360 fill[idx] = fm; 361 } 362 row = fill[n]; 363 while ( row < i ) { 364 ajtmp = ajnew + idnew[row] + (!shift); 365 nzbd = 1 + idnew[row] - ainew[row]; 366 nz = im[row] - nzbd; 367 fm = row; 368 while (nz-- > 0) { 369 idx = *ajtmp++ + shift; 370 nzbd++; 371 if (idx == i) im[row] = nzbd; 372 do { 373 m = fm; 374 fm = fill[m]; 375 } while (fm < idx); 376 if (fm != idx) { 377 fill[m] = idx; 378 fill[idx] = fm; 379 fm = idx; 380 nnz++; 381 } 382 } 383 row = fill[row]; 384 } 385 /* copy new filled row into permanent storage */ 386 ainew[i+1] = ainew[i] + nnz; 387 if (ainew[i+1] > jmax) { 388 389 /* estimate how much additional space we will need */ 390 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 391 /* just double the memory each time */ 392 int maxadd = jmax; 393 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 394 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 395 jmax += maxadd; 396 397 /* allocate a longer ajnew */ 398 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 399 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 400 ierr = PetscFree(ajnew);CHKERRQ(ierr); 401 ajnew = ajtmp; 402 realloc++; /* count how many times we realloc */ 403 } 404 ajtmp = ajnew + ainew[i] + shift; 405 fm = fill[n]; 406 nzi = 0; 407 im[i] = nnz; 408 while (nnz--) { 409 if (fm < i) nzi++; 410 *ajtmp++ = fm - shift; 411 fm = fill[fm]; 412 } 413 idnew[i] = ainew[i] + nzi; 414 } 415 if (ai[n] != 0) { 416 double af = ((double)ainew[n])/((double)ai[n]); 417 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 418 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 419 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 420 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 421 } else { 422 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 423 } 424 425 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 426 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 427 428 ierr = PetscFree(fill);CHKERRQ(ierr); 429 430 /* put together the new matrix */ 431 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 432 PLogObjectParent(*B,isicol); 433 b = (Mat_SeqAIJ *) (*B)->data; 434 ierr = PetscFree(b->imax);CHKERRQ(ierr); 435 b->singlemalloc = PETSC_FALSE; 436 /* the next line frees the default space generated by the Create() */ 437 ierr = PetscFree(b->a);CHKERRQ(ierr); 438 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 439 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 440 b->j = ajnew; 441 b->i = ainew; 442 b->diag = idnew; 443 b->ilen = 0; 444 b->imax = 0; 445 b->row = isrow; 446 b->col = iscol; 447 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 448 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 449 b->icol = isicol; 450 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 451 /* In b structure: Free imax, ilen, old a, old j. 452 Allocate idnew, solve_work, new a, new j */ 453 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 454 b->maxnz = b->nz = ainew[n] + shift; 455 456 (*B)->factor = FACTOR_LU;; 457 (*B)->info.factor_mallocs = realloc; 458 (*B)->info.fill_ratio_given = f; 459 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 460 461 if (ai[n] != 0) { 462 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 463 } else { 464 (*B)->info.fill_ratio_needed = 0.0; 465 } 466 PetscFunctionReturn(0); 467 } 468 /* ----------------------------------------------------------- */ 469 extern int Mat_AIJ_CheckInode(Mat); 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 473 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 474 { 475 Mat C = *B; 476 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 477 IS isrow = b->row, isicol = b->icol; 478 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 479 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 480 int *diag_offset = b->diag,diag,k; 481 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 482 register int *pj; 483 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 484 double ssum; 485 register Scalar *pv, *rtmps,*u_values; 486 487 PetscFunctionBegin; 488 489 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 490 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 491 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 492 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 493 rtmps = rtmp + shift; ics = ic + shift; 494 495 /* precalculate row sums */ 496 if (preserve_row_sums) { 497 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 498 for ( i=0; i<n; i++ ) { 499 nz = a->i[r[i]+1] - a->i[r[i]]; 500 v = a->a + a->i[r[i]] + shift; 501 sum = 0.0; 502 for ( j=0; j<nz; j++ ) sum += v[j]; 503 rowsums[i] = sum; 504 } 505 } 506 507 for ( i=0; i<n; i++ ) { 508 nz = ai[i+1] - ai[i]; 509 ajtmp = aj + ai[i] + shift; 510 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 511 512 /* load in initial (unfactored row) */ 513 nz = a->i[r[i]+1] - a->i[r[i]]; 514 ajtmpold = a->j + a->i[r[i]] + shift; 515 v = a->a + a->i[r[i]] + shift; 516 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 517 518 row = *ajtmp++ + shift; 519 while (row < i ) { 520 pc = rtmp + row; 521 if (*pc != 0.0) { 522 pv = b->a + diag_offset[row] + shift; 523 pj = b->j + diag_offset[row] + (!shift); 524 multiplier = *pc / *pv++; 525 *pc = multiplier; 526 nz = ai[row+1] - diag_offset[row] - 1; 527 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 528 PLogFlops(2*nz); 529 } 530 row = *ajtmp++ + shift; 531 } 532 /* finished row so stick it into b->a */ 533 pv = b->a + ai[i] + shift; 534 pj = b->j + ai[i] + shift; 535 nz = ai[i+1] - ai[i]; 536 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 537 diag = diag_offset[i] - ai[i]; 538 /* 539 Possibly adjust diagonal entry on current row to force 540 LU matrix to have same row sum as initial matrix. 541 */ 542 if (pv[diag] == 0.0) { 543 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 544 } 545 if (preserve_row_sums) { 546 pj = b->j + ai[i] + shift; 547 sum = rowsums[i]; 548 for ( j=0; j<diag; j++ ) { 549 u_values = b->a + diag_offset[pj[j]] + shift; 550 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 551 inner_sum = 0.0; 552 for ( k=0; k<nz; k++ ) { 553 inner_sum += u_values[k]; 554 } 555 sum -= pv[j]*inner_sum; 556 557 } 558 nz = ai[i+1] - diag_offset[i] - 1; 559 u_values = b->a + diag_offset[i] + 1 + shift; 560 for ( k=0; k<nz; k++ ) { 561 sum -= u_values[k]; 562 } 563 ssum = PetscAbsScalar(sum/pv[diag]); 564 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 565 } 566 /* check pivot entry for current row */ 567 } 568 569 /* invert diagonal entries for simplier triangular solves */ 570 for ( i=0; i<n; i++ ) { 571 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 572 } 573 574 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 575 ierr = PetscFree(rtmp);CHKERRQ(ierr); 576 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 577 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 578 C->factor = FACTOR_LU; 579 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 580 C->assembled = PETSC_TRUE; 581 PLogFlops(b->n); 582 PetscFunctionReturn(0); 583 } 584 /* ----------------------------------------------------------- */ 585 #undef __FUNC__ 586 #define __FUNC__ "MatLUFactor_SeqAIJ" 587 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 588 { 589 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 590 int ierr,refct; 591 Mat C; 592 PetscOps *Abops; 593 MatOps Aops; 594 595 PetscFunctionBegin; 596 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 597 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 598 599 /* free all the data structures from mat */ 600 ierr = PetscFree(mat->a);CHKERRQ(ierr); 601 if (!mat->singlemalloc) { 602 ierr = PetscFree(mat->i);CHKERRQ(ierr); 603 ierr = PetscFree(mat->j);CHKERRQ(ierr); 604 } 605 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 606 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 607 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 608 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 609 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 610 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 611 ierr = PetscFree(mat);CHKERRQ(ierr); 612 613 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 614 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 615 616 /* 617 This is horrible, horrible code. We need to keep the 618 A pointers for the bops and ops but copy everything 619 else from C. 620 */ 621 Abops = A->bops; 622 Aops = A->ops; 623 refct = A->refct; 624 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 625 mat = (Mat_SeqAIJ *) A->data; 626 PLogObjectParent(A,mat->icol); 627 628 A->bops = Abops; 629 A->ops = Aops; 630 A->qlist = 0; 631 A->refct = refct; 632 /* copy over the type_name and name */ 633 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 634 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 635 636 PetscHeaderDestroy(C); 637 638 PetscFunctionReturn(0); 639 } 640 /* ----------------------------------------------------------- */ 641 #undef __FUNC__ 642 #define __FUNC__ "MatSolve_SeqAIJ" 643 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 644 { 645 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 646 IS iscol = a->col, isrow = a->row; 647 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 648 int nz,shift = a->indexshift,*rout,*cout; 649 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 650 651 PetscFunctionBegin; 652 if (!n) PetscFunctionReturn(0); 653 654 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 655 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 656 tmp = a->solve_work; 657 658 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 659 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 660 661 /* forward solve the lower triangular */ 662 tmp[0] = b[*r++]; 663 tmps = tmp + shift; 664 for ( i=1; i<n; i++ ) { 665 v = aa + ai[i] + shift; 666 vi = aj + ai[i] + shift; 667 nz = a->diag[i] - ai[i]; 668 sum = b[*r++]; 669 while (nz--) sum -= *v++ * tmps[*vi++]; 670 tmp[i] = sum; 671 } 672 673 /* backward solve the upper triangular */ 674 for ( i=n-1; i>=0; i-- ){ 675 v = aa + a->diag[i] + (!shift); 676 vi = aj + a->diag[i] + (!shift); 677 nz = ai[i+1] - a->diag[i] - 1; 678 sum = tmp[i]; 679 while (nz--) sum -= *v++ * tmps[*vi++]; 680 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 681 } 682 683 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 684 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 685 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 PLogFlops(2*a->nz - a->n); 688 PetscFunctionReturn(0); 689 } 690 691 /* ----------------------------------------------------------- */ 692 #undef __FUNC__ 693 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 694 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 695 { 696 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 697 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 698 Scalar *x,*b, *aa = a->a, sum; 699 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 700 int adiag_i,i,*vi,nz,ai_i; 701 Scalar *v; 702 #endif 703 704 PetscFunctionBegin; 705 if (!n) PetscFunctionReturn(0); 706 if (a->indexshift) { 707 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713 714 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 715 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 716 #else 717 /* forward solve the lower triangular */ 718 x[0] = b[0]; 719 for ( i=1; i<n; i++ ) { 720 ai_i = ai[i]; 721 v = aa + ai_i; 722 vi = aj + ai_i; 723 nz = adiag[i] - ai_i; 724 sum = b[i]; 725 while (nz--) sum -= *v++ * x[*vi++]; 726 x[i] = sum; 727 } 728 729 /* backward solve the upper triangular */ 730 for ( i=n-1; i>=0; i-- ){ 731 adiag_i = adiag[i]; 732 v = aa + adiag_i + 1; 733 vi = aj + adiag_i + 1; 734 nz = ai[i+1] - adiag_i - 1; 735 sum = x[i]; 736 while (nz--) sum -= *v++ * x[*vi++]; 737 x[i] = sum*aa[adiag_i]; 738 } 739 #endif 740 PLogFlops(2*a->nz - a->n); 741 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNC__ 747 #define __FUNC__ "MatSolveAdd_SeqAIJ" 748 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 749 { 750 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 751 IS iscol = a->col, isrow = a->row; 752 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 753 int nz, shift = a->indexshift,*rout,*cout; 754 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 755 756 PetscFunctionBegin; 757 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 758 759 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 760 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761 tmp = a->solve_work; 762 763 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 764 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 765 766 /* forward solve the lower triangular */ 767 tmp[0] = b[*r++]; 768 for ( i=1; i<n; i++ ) { 769 v = aa + ai[i] + shift; 770 vi = aj + ai[i] + shift; 771 nz = a->diag[i] - ai[i]; 772 sum = b[*r++]; 773 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 774 tmp[i] = sum; 775 } 776 777 /* backward solve the upper triangular */ 778 for ( i=n-1; i>=0; i-- ){ 779 v = aa + a->diag[i] + (!shift); 780 vi = aj + a->diag[i] + (!shift); 781 nz = ai[i+1] - a->diag[i] - 1; 782 sum = tmp[i]; 783 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 784 tmp[i] = sum*aa[a->diag[i]+shift]; 785 x[*c--] += tmp[i]; 786 } 787 788 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 789 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 790 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 PLogFlops(2*a->nz); 793 794 PetscFunctionReturn(0); 795 } 796 /* -------------------------------------------------------------------*/ 797 #undef __FUNC__ 798 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 799 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 800 { 801 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 802 IS iscol = a->col, isrow = a->row; 803 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 804 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 805 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 806 807 PetscFunctionBegin; 808 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 809 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810 tmp = a->solve_work; 811 812 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 813 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 814 815 /* copy the b into temp work space according to permutation */ 816 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 817 818 /* forward solve the U^T */ 819 for ( i=0; i<n; i++ ) { 820 v = aa + diag[i] + shift; 821 vi = aj + diag[i] + (!shift); 822 nz = ai[i+1] - diag[i] - 1; 823 s1 = tmp[i]; 824 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 825 while (nz--) { 826 tmp[*vi++ + shift] -= (*v++)*s1; 827 } 828 tmp[i] = s1; 829 } 830 831 /* backward solve the L^T */ 832 for ( i=n-1; i>=0; i-- ){ 833 v = aa + diag[i] - 1 + shift; 834 vi = aj + diag[i] - 1 + shift; 835 nz = diag[i] - ai[i]; 836 s1 = tmp[i]; 837 while (nz--) { 838 tmp[*vi-- + shift] -= (*v--)*s1; 839 } 840 } 841 842 /* copy tmp into x according to permutation */ 843 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 844 845 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 846 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 847 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 848 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 849 850 PLogFlops(2*a->nz-a->n); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNC__ 855 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 856 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 859 IS iscol = a->col, isrow = a->row; 860 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 861 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 862 Scalar *x,*b,*tmp, *aa = a->a, *v; 863 864 PetscFunctionBegin; 865 if (zz != xx) VecCopy(zz,xx); 866 867 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 tmp = a->solve_work; 870 871 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 872 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 873 874 /* copy the b into temp work space according to permutation */ 875 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 876 877 /* forward solve the U^T */ 878 for ( i=0; i<n; i++ ) { 879 v = aa + diag[i] + shift; 880 vi = aj + diag[i] + (!shift); 881 nz = ai[i+1] - diag[i] - 1; 882 tmp[i] *= *v++; 883 while (nz--) { 884 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 885 } 886 } 887 888 /* backward solve the L^T */ 889 for ( i=n-1; i>=0; i-- ){ 890 v = aa + diag[i] - 1 + shift; 891 vi = aj + diag[i] - 1 + shift; 892 nz = diag[i] - ai[i]; 893 while (nz--) { 894 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 895 } 896 } 897 898 /* copy tmp into x according to permutation */ 899 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 900 901 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 902 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 903 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 904 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 905 906 PLogFlops(2*a->nz); 907 PetscFunctionReturn(0); 908 } 909 /* ----------------------------------------------------------------*/ 910 extern int MatMissingDiagonal_SeqAIJ(Mat); 911 912 #undef __FUNC__ 913 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 914 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 915 { 916 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 917 IS isicol; 918 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 919 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 920 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 921 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 922 PetscTruth col_identity, row_identity; 923 double f; 924 925 PetscFunctionBegin; 926 if (info) { 927 f = info->fill; 928 levels = (int) info->levels; 929 diagonal_fill = (int) info->diagonal_fill; 930 } else { 931 f = 1.0; 932 levels = 0; 933 diagonal_fill = 0; 934 } 935 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 936 937 /* special case that simply copies fill pattern */ 938 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 939 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 940 if (!levels && row_identity && col_identity) { 941 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 942 (*fact)->factor = FACTOR_LU; 943 b = (Mat_SeqAIJ *) (*fact)->data; 944 if (!b->diag) { 945 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 946 } 947 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 948 b->row = isrow; 949 b->col = iscol; 950 b->icol = isicol; 951 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 952 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 953 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 954 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 959 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 960 961 /* get new row pointers */ 962 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 963 ainew[0] = -shift; 964 /* don't know how many column pointers are needed so estimate */ 965 jmax = (int) (f*(ai[n]+!shift)); 966 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 967 /* ajfill is level of fill for each fill entry */ 968 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 969 /* fill is a linked list of nonzeros in active row */ 970 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 971 /* im is level for each filled value */ 972 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 973 /* dloc is location of diagonal in factor */ 974 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 975 dloc[0] = 0; 976 for ( prow=0; prow<n; prow++ ) { 977 978 /* copy current row into linked list */ 979 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 980 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 981 xi = aj + ai[r[prow]] + shift; 982 fill[n] = n; 983 fill[prow] = -1; /* marker to indicate if diagonal exists */ 984 while (nz--) { 985 fm = n; 986 idx = ic[*xi++ + shift]; 987 do { 988 m = fm; 989 fm = fill[m]; 990 } while (fm < idx); 991 fill[m] = idx; 992 fill[idx] = fm; 993 im[idx] = 0; 994 } 995 996 /* make sure diagonal entry is included */ 997 if (diagonal_fill && fill[prow] == -1) { 998 fm = n; 999 while (fill[fm] < prow) fm = fill[fm]; 1000 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 1001 fill[fm] = prow; 1002 im[prow] = 0; 1003 nzf++; 1004 dcount++; 1005 } 1006 1007 nzi = 0; 1008 row = fill[n]; 1009 while ( row < prow ) { 1010 incrlev = im[row] + 1; 1011 nz = dloc[row]; 1012 xi = ajnew + ainew[row] + shift + nz + 1; 1013 flev = ajfill + ainew[row] + shift + nz + 1; 1014 nnz = ainew[row+1] - ainew[row] - nz - 1; 1015 fm = row; 1016 while (nnz-- > 0) { 1017 idx = *xi++ + shift; 1018 if (*flev + incrlev > levels) { 1019 flev++; 1020 continue; 1021 } 1022 do { 1023 m = fm; 1024 fm = fill[m]; 1025 } while (fm < idx); 1026 if (fm != idx) { 1027 im[idx] = *flev + incrlev; 1028 fill[m] = idx; 1029 fill[idx] = fm; 1030 fm = idx; 1031 nzf++; 1032 } else { 1033 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1034 } 1035 flev++; 1036 } 1037 row = fill[row]; 1038 nzi++; 1039 } 1040 /* copy new filled row into permanent storage */ 1041 ainew[prow+1] = ainew[prow] + nzf; 1042 if (ainew[prow+1] > jmax-shift) { 1043 1044 /* estimate how much additional space we will need */ 1045 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1046 /* just double the memory each time */ 1047 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1048 int maxadd = jmax; 1049 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1050 jmax += maxadd; 1051 1052 /* allocate a longer ajnew and ajfill */ 1053 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1054 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1055 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1056 ajnew = xi; 1057 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1058 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1059 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1060 ajfill = xi; 1061 realloc++; /* count how many times we realloc */ 1062 } 1063 xi = ajnew + ainew[prow] + shift; 1064 flev = ajfill + ainew[prow] + shift; 1065 dloc[prow] = nzi; 1066 fm = fill[n]; 1067 while (nzf--) { 1068 *xi++ = fm - shift; 1069 *flev++ = im[fm]; 1070 fm = fill[fm]; 1071 } 1072 /* make sure row has diagonal entry */ 1073 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1074 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1075 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1076 } 1077 } 1078 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1079 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1080 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1081 ierr = PetscFree(fill);CHKERRQ(ierr); 1082 ierr = PetscFree(im);CHKERRQ(ierr); 1083 1084 { 1085 double af = ((double)ainew[n])/((double)ai[n]); 1086 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1087 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1088 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1089 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1090 if (diagonal_fill) { 1091 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1092 } 1093 } 1094 1095 /* put together the new matrix */ 1096 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1097 PLogObjectParent(*fact,isicol); 1098 b = (Mat_SeqAIJ *) (*fact)->data; 1099 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1100 b->singlemalloc = PETSC_FALSE; 1101 len = (ainew[n] + shift)*sizeof(Scalar); 1102 /* the next line frees the default space generated by the Create() */ 1103 ierr = PetscFree(b->a);CHKERRQ(ierr); 1104 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1105 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1106 b->j = ajnew; 1107 b->i = ainew; 1108 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1109 b->diag = dloc; 1110 b->ilen = 0; 1111 b->imax = 0; 1112 b->row = isrow; 1113 b->col = iscol; 1114 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1115 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1116 b->icol = isicol; 1117 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1118 /* In b structure: Free imax, ilen, old a, old j. 1119 Allocate dloc, solve_work, new a, new j */ 1120 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1121 b->maxnz = b->nz = ainew[n] + shift; 1122 (*fact)->factor = FACTOR_LU; 1123 1124 (*fact)->info.factor_mallocs = realloc; 1125 (*fact)->info.fill_ratio_given = f; 1126 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1127 (*fact)->factor = FACTOR_LU;; 1128 1129 PetscFunctionReturn(0); 1130 } 1131 1132 1133 1134 1135