1 /*$Id: aijfact.c,v 1.131 1999/12/16 23:06:22 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 #if !defined(PETSC_USE_COMPLEX) && defined(PETSC_HAVE_SAADILUDT) 24 25 #if defined(PETSC_HAVE_FORTRAN_CAPS) 26 #define dperm_ DPERM 27 #define ilutp_ ILUTP 28 #define ilu0_ ILU0 29 #define msrcsr_ MSRCSR 30 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 31 #define dperm_ dperm 32 #define ilutp_ ilutp 33 #define ilu0_ ilu0 34 #define msrcsr_ msrcsr 35 #endif 36 37 EXTERN_C_BEGIN 38 extern void dperm_(int*,double*,int*,int*,double*,int*,int*,int*,int*,int*); 39 extern void ilutp_(int*,double*,int*,int*,int*,double*,double*,int*,double*,int*,int*,int*,double*,int*,int*,int*); 40 extern void msrcsr_(int*,double*,int*,double*,int*,int*,double*,int*); 41 extern void ilu0_(int*,double*,int*,int*,double*,int*,int*,int*,int *); 42 EXTERN_C_END 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 46 /* ------------------------------------------------------------ 47 48 This interface was contribed by Tony Caola 49 50 This routine is an interface to the pivoting drop-tolerance 51 ILU routine written by Yousef Saad (saad@cs.umn.edu). It 52 was inspired by the non-pivoting iludt written by David 53 Hysom (hysom@cs.odu.edu). 54 55 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 56 help in getting this routine ironed out. 57 58 As of right now, there are a couple of things that could 59 be, uh, better. 60 61 1 - Since Saad's routine is Fortran based, memory cannot be 62 malloc'd. I was trying to get the expected fill from the 63 preconditioner and use this number as the multiplier in 64 the equation for jmax, below, but couldn't figure it out. 65 Anyway, perhaps a better solution is to run SPARSKIT through 66 f2c and incorporate mallocs(), but I want to graduate so I'll 67 just rebuild Petsc. . . 68 69 shift = 1, ishift = 0, for indices start at 1 70 shift = 0, ishift = 1, for indices starting at 0 71 ------------------------------------------------------------ 72 */ 73 74 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 75 { 76 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 77 IS iscolf, isrowf, isicol, isirow; 78 PetscTruth reorder; 79 int *c,*r,*ic,*ir, ierr, i, n = a->m; 80 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 81 int *ordcol, *ordrow,*iwk,*iperm, *jw; 82 int ishift = !a->indexshift,shift = -a->indexshift; 83 int jmax,lfill,job,*o_i,*o_j; 84 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,permtol=0.0,*o_a; 85 86 PetscFunctionBegin; 87 88 if (info->dt == PETSC_DEFAULT) info->dt = .005; 89 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 90 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 91 if (info->fill == PETSC_DEFAULT) info->fill = (n*info->dtcount)/a->nz; 92 lfill = info->dtcount/2; 93 jmax = info->fill*a->nz; 94 permtol = info->dtcol; 95 96 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 97 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 98 99 100 /* ------------------------------------------------------------ 101 If reorder=.TRUE., then the original matrix has to be 102 reordered to reflect the user selected ordering scheme, and 103 then de-reordered so it is in it's original format. 104 Because Saad's dperm() is NOT in place, we have to copy 105 the original matrix and allocate more storage. . . 106 ------------------------------------------------------------ 107 */ 108 109 /* set reorder to true if either isrow or iscol is not identity */ 110 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 111 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 112 reorder = PetscNot(reorder); 113 114 115 /* storage for ilu factor */ 116 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 117 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 118 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 119 120 if (reorder) { 121 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 122 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 123 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 124 } 125 126 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 127 ordrow = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordrow); 128 129 /* ------------------------------------------------------------ 130 Make sure that everything is Fortran formatted (1-Based) 131 ------------------------------------------------------------ 132 */ 133 if (ishift) { 134 for (i=old_i[0];i<old_i[n];i++) { 135 old_j[i]++; 136 } 137 for(i=0;i<n+1;i++) { 138 old_i[i]++; 139 }; 140 }; 141 142 143 144 if (reorder) { 145 job = 3; 146 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 147 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 148 for(i=0;i<n;i++) { 149 r[i] = r[i]+1; 150 c[i] = c[i]+1; 151 } 152 dperm_(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 153 for (i=0;i<n;i++) { 154 r[i] = r[i]-1; 155 c[i] = c[i]-1; 156 } 157 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 158 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 159 o_a = old_a2; 160 o_j = old_j2; 161 o_i = old_i2; 162 } else { 163 o_a = old_a; 164 o_j = old_j; 165 o_i = old_i; 166 } 167 168 /* ------------------------------------------------------------ 169 Call Saad's ilutp() routine to generate the factorization 170 ------------------------------------------------------------ 171 */ 172 173 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 174 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 175 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 176 177 ilutp_(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 178 if (ierr) { 179 switch (ierr) { 180 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 181 break; 182 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 183 break; 184 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 185 break; 186 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 187 break; 188 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 189 break; 190 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 191 } 192 } 193 194 ierr = PetscFree(w);CHKERRQ(ierr); 195 ierr = PetscFree(jw);CHKERRQ(ierr); 196 197 198 /* ------------------------------------------------------------ 199 Saad's routine gives the result in Modified Sparse Row (msr) 200 Convert to Compressed Sparse Row format (csr) 201 ------------------------------------------------------------ 202 */ 203 204 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 205 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 206 207 msrcsr_(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 208 209 ierr = PetscFree(iwk);CHKERRQ(ierr); 210 ierr = PetscFree(wk);CHKERRQ(ierr); 211 212 if (reorder) { 213 ierr = PetscFree(old_a2);CHKERRQ(ierr); 214 ierr = PetscFree(old_j2);CHKERRQ(ierr); 215 ierr = PetscFree(old_i2);CHKERRQ(ierr); 216 } 217 218 /* fix permutation of old_j that the factorization introduced */ 219 if (!reorder) { 220 for (i=old_i[0]; i<=old_i[n]; i++) { 221 old_j[i-1] = iperm[old_j[i-1]-1]; 222 } 223 } 224 225 /* get rid of the shift to indices starting at 1 */ 226 if (ishift) { 227 for (i=0; i<n+1; i++) { 228 old_i[i]--; 229 } 230 for (i=old_i[0];i<old_i[n];i++) { 231 old_j[i]--; 232 } 233 } 234 235 /* Make the factored matrix 0-based */ 236 if (ishift) { 237 for (i=0; i<n+1; i++) { 238 new_i[i]--; 239 } 240 for (i=new_i[0];i<new_i[n];i++) { 241 new_j[i]--; 242 } 243 } 244 245 246 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 247 /*-- permute the right-hand-side and solution vectors --*/ 248 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 249 ierr = ISGetIndices(isirow,&ir); CHKERRQ(ierr); 250 for(i=0; i<n; i++) { 251 ordcol[i] = ic[iperm[i]-1]; 252 ordrow[i] = ir[i]; 253 }; 254 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 255 ierr = ISRestoreIndices(isirow,&ir); CHKERRQ(ierr); 256 257 ierr = PetscFree(iperm);CHKERRQ(ierr); 258 259 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 260 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordrow,&isrowf); 261 262 /*----- put together the new matrix -----*/ 263 264 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 265 (*fact)->factor = FACTOR_LU; 266 (*fact)->assembled = PETSC_TRUE; 267 268 b = (Mat_SeqAIJ *) (*fact)->data; 269 ierr = PetscFree(b->imax);CHKERRQ(ierr); 270 b->sorted = PETSC_FALSE; 271 b->singlemalloc = PETSC_TRUE; 272 /* the next line frees the default space generated by the MatCreate() */ 273 ierr = PetscFree(b->a);CHKERRQ(ierr); 274 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 275 b->a = new_a; 276 b->j = new_j; 277 b->i = new_i; 278 b->ilen = 0; 279 b->imax = 0; 280 b->row = isrowf; 281 b->col = iscolf; 282 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 283 b->maxnz = b->nz = new_i[n]; 284 b->indexshift = a->indexshift; 285 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 286 (*fact)->info.factor_mallocs = 0; 287 288 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 289 290 /* check out for identical nodes. If found, use inode functions */ 291 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 292 293 PetscFunctionReturn(0); 294 } 295 #else 296 #undef __FUNC__ 297 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 298 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 299 { 300 PetscFunctionBegin; 301 SETERRQ(1,1,"You must install Saad's ILUDT to use this"); 302 } 303 #endif 304 305 /* 306 Factorization code for AIJ format. 307 */ 308 #undef __FUNC__ 309 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 310 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 311 { 312 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 313 IS isicol; 314 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 315 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 316 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 317 318 PetscFunctionBegin; 319 PetscValidHeaderSpecific(isrow,IS_COOKIE); 320 PetscValidHeaderSpecific(iscol,IS_COOKIE); 321 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 322 323 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 324 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 325 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 326 327 /* get new row pointers */ 328 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 329 ainew[0] = -shift; 330 /* don't know how many column pointers are needed so estimate */ 331 jmax = (int) (f*ai[n]+(!shift)); 332 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 333 /* fill is a linked list of nonzeros in active row */ 334 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 335 im = fill + n + 1; 336 /* idnew is location of diagonal in factor */ 337 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 338 idnew[0] = -shift; 339 340 for ( i=0; i<n; i++ ) { 341 /* first copy previous fill into linked list */ 342 nnz = nz = ai[r[i]+1] - ai[r[i]]; 343 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 344 ajtmp = aj + ai[r[i]] + shift; 345 fill[n] = n; 346 while (nz--) { 347 fm = n; 348 idx = ic[*ajtmp++ + shift]; 349 do { 350 m = fm; 351 fm = fill[m]; 352 } while (fm < idx); 353 fill[m] = idx; 354 fill[idx] = fm; 355 } 356 row = fill[n]; 357 while ( row < i ) { 358 ajtmp = ajnew + idnew[row] + (!shift); 359 nzbd = 1 + idnew[row] - ainew[row]; 360 nz = im[row] - nzbd; 361 fm = row; 362 while (nz-- > 0) { 363 idx = *ajtmp++ + shift; 364 nzbd++; 365 if (idx == i) im[row] = nzbd; 366 do { 367 m = fm; 368 fm = fill[m]; 369 } while (fm < idx); 370 if (fm != idx) { 371 fill[m] = idx; 372 fill[idx] = fm; 373 fm = idx; 374 nnz++; 375 } 376 } 377 row = fill[row]; 378 } 379 /* copy new filled row into permanent storage */ 380 ainew[i+1] = ainew[i] + nnz; 381 if (ainew[i+1] > jmax) { 382 383 /* estimate how much additional space we will need */ 384 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 385 /* just double the memory each time */ 386 int maxadd = jmax; 387 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 388 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 389 jmax += maxadd; 390 391 /* allocate a longer ajnew */ 392 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 393 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 394 ierr = PetscFree(ajnew);CHKERRQ(ierr); 395 ajnew = ajtmp; 396 realloc++; /* count how many times we realloc */ 397 } 398 ajtmp = ajnew + ainew[i] + shift; 399 fm = fill[n]; 400 nzi = 0; 401 im[i] = nnz; 402 while (nnz--) { 403 if (fm < i) nzi++; 404 *ajtmp++ = fm - shift; 405 fm = fill[fm]; 406 } 407 idnew[i] = ainew[i] + nzi; 408 } 409 if (ai[n] != 0) { 410 double af = ((double)ainew[n])/((double)ai[n]); 411 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 412 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 413 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 414 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 415 } else { 416 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 417 } 418 419 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 420 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 421 422 ierr = PetscFree(fill);CHKERRQ(ierr); 423 424 /* put together the new matrix */ 425 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 426 PLogObjectParent(*B,isicol); 427 b = (Mat_SeqAIJ *) (*B)->data; 428 ierr = PetscFree(b->imax);CHKERRQ(ierr); 429 b->singlemalloc = PETSC_FALSE; 430 /* the next line frees the default space generated by the Create() */ 431 ierr = PetscFree(b->a);CHKERRQ(ierr); 432 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 433 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 434 b->j = ajnew; 435 b->i = ainew; 436 b->diag = idnew; 437 b->ilen = 0; 438 b->imax = 0; 439 b->row = isrow; 440 b->col = iscol; 441 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 442 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 443 b->icol = isicol; 444 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 445 /* In b structure: Free imax, ilen, old a, old j. 446 Allocate idnew, solve_work, new a, new j */ 447 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 448 b->maxnz = b->nz = ainew[n] + shift; 449 450 (*B)->factor = FACTOR_LU;; 451 (*B)->info.factor_mallocs = realloc; 452 (*B)->info.fill_ratio_given = f; 453 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 454 455 if (ai[n] != 0) { 456 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 457 } else { 458 (*B)->info.fill_ratio_needed = 0.0; 459 } 460 PetscFunctionReturn(0); 461 } 462 /* ----------------------------------------------------------- */ 463 extern int Mat_AIJ_CheckInode(Mat); 464 465 #undef __FUNC__ 466 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 467 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 468 { 469 Mat C = *B; 470 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 471 IS isrow = b->row, isicol = b->icol; 472 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 473 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 474 int *diag_offset = b->diag,diag,k; 475 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 476 register int *pj; 477 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 478 double ssum; 479 register Scalar *pv, *rtmps,*u_values; 480 481 PetscFunctionBegin; 482 483 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 484 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 485 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 486 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 487 rtmps = rtmp + shift; ics = ic + shift; 488 489 /* precalculate row sums */ 490 if (preserve_row_sums) { 491 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 492 for ( i=0; i<n; i++ ) { 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 v = a->a + a->i[r[i]] + shift; 495 sum = 0.0; 496 for ( j=0; j<nz; j++ ) sum += v[j]; 497 rowsums[i] = sum; 498 } 499 } 500 501 for ( i=0; i<n; i++ ) { 502 nz = ai[i+1] - ai[i]; 503 ajtmp = aj + ai[i] + shift; 504 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 505 506 /* load in initial (unfactored row) */ 507 nz = a->i[r[i]+1] - a->i[r[i]]; 508 ajtmpold = a->j + a->i[r[i]] + shift; 509 v = a->a + a->i[r[i]] + shift; 510 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 511 512 row = *ajtmp++ + shift; 513 while (row < i ) { 514 pc = rtmp + row; 515 if (*pc != 0.0) { 516 pv = b->a + diag_offset[row] + shift; 517 pj = b->j + diag_offset[row] + (!shift); 518 multiplier = *pc / *pv++; 519 *pc = multiplier; 520 nz = ai[row+1] - diag_offset[row] - 1; 521 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 522 PLogFlops(2*nz); 523 } 524 row = *ajtmp++ + shift; 525 } 526 /* finished row so stick it into b->a */ 527 pv = b->a + ai[i] + shift; 528 pj = b->j + ai[i] + shift; 529 nz = ai[i+1] - ai[i]; 530 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 531 diag = diag_offset[i] - ai[i]; 532 /* 533 Possibly adjust diagonal entry on current row to force 534 LU matrix to have same row sum as initial matrix. 535 */ 536 if (pv[diag] == 0.0) { 537 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 538 } 539 if (preserve_row_sums) { 540 pj = b->j + ai[i] + shift; 541 sum = rowsums[i]; 542 for ( j=0; j<diag; j++ ) { 543 u_values = b->a + diag_offset[pj[j]] + shift; 544 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 545 inner_sum = 0.0; 546 for ( k=0; k<nz; k++ ) { 547 inner_sum += u_values[k]; 548 } 549 sum -= pv[j]*inner_sum; 550 551 } 552 nz = ai[i+1] - diag_offset[i] - 1; 553 u_values = b->a + diag_offset[i] + 1 + shift; 554 for ( k=0; k<nz; k++ ) { 555 sum -= u_values[k]; 556 } 557 ssum = PetscAbsScalar(sum/pv[diag]); 558 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 559 } 560 /* check pivot entry for current row */ 561 } 562 563 /* invert diagonal entries for simplier triangular solves */ 564 for ( i=0; i<n; i++ ) { 565 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 566 } 567 568 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 569 ierr = PetscFree(rtmp);CHKERRQ(ierr); 570 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 571 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 572 C->factor = FACTOR_LU; 573 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 574 C->assembled = PETSC_TRUE; 575 PLogFlops(b->n); 576 PetscFunctionReturn(0); 577 } 578 /* ----------------------------------------------------------- */ 579 #undef __FUNC__ 580 #define __FUNC__ "MatLUFactor_SeqAIJ" 581 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 582 { 583 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 584 int ierr,refct; 585 Mat C; 586 PetscOps *Abops; 587 MatOps Aops; 588 589 PetscFunctionBegin; 590 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 591 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 592 593 /* free all the data structures from mat */ 594 ierr = PetscFree(mat->a);CHKERRQ(ierr); 595 if (!mat->singlemalloc) { 596 ierr = PetscFree(mat->i);CHKERRQ(ierr); 597 ierr = PetscFree(mat->j);CHKERRQ(ierr); 598 } 599 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 600 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 601 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 602 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 603 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 604 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 605 ierr = PetscFree(mat);CHKERRQ(ierr); 606 607 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 608 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 609 610 /* 611 This is horrible, horrible code. We need to keep the 612 A pointers for the bops and ops but copy everything 613 else from C. 614 */ 615 Abops = A->bops; 616 Aops = A->ops; 617 refct = A->refct; 618 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 619 mat = (Mat_SeqAIJ *) A->data; 620 PLogObjectParent(A,mat->icol); 621 622 A->bops = Abops; 623 A->ops = Aops; 624 A->qlist = 0; 625 A->refct = refct; 626 /* copy over the type_name and name */ 627 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 628 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 629 630 PetscHeaderDestroy(C); 631 632 PetscFunctionReturn(0); 633 } 634 /* ----------------------------------------------------------- */ 635 #undef __FUNC__ 636 #define __FUNC__ "MatSolve_SeqAIJ" 637 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 638 { 639 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 640 IS iscol = a->col, isrow = a->row; 641 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 642 int nz,shift = a->indexshift,*rout,*cout; 643 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 644 645 PetscFunctionBegin; 646 if (!n) PetscFunctionReturn(0); 647 648 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 649 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 650 tmp = a->solve_work; 651 652 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 653 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 654 655 /* forward solve the lower triangular */ 656 tmp[0] = b[*r++]; 657 tmps = tmp + shift; 658 for ( i=1; i<n; i++ ) { 659 v = aa + ai[i] + shift; 660 vi = aj + ai[i] + shift; 661 nz = a->diag[i] - ai[i]; 662 sum = b[*r++]; 663 while (nz--) sum -= *v++ * tmps[*vi++]; 664 tmp[i] = sum; 665 } 666 667 /* backward solve the upper triangular */ 668 for ( i=n-1; i>=0; i-- ){ 669 v = aa + a->diag[i] + (!shift); 670 vi = aj + a->diag[i] + (!shift); 671 nz = ai[i+1] - a->diag[i] - 1; 672 sum = tmp[i]; 673 while (nz--) sum -= *v++ * tmps[*vi++]; 674 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 675 } 676 677 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 679 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 681 PLogFlops(2*a->nz - a->n); 682 PetscFunctionReturn(0); 683 } 684 685 /* ----------------------------------------------------------- */ 686 #undef __FUNC__ 687 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 688 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 689 { 690 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 691 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 692 Scalar *x,*b, *aa = a->a, sum; 693 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 694 int adiag_i,i,*vi,nz,ai_i; 695 Scalar *v; 696 #endif 697 698 PetscFunctionBegin; 699 if (!n) PetscFunctionReturn(0); 700 if (a->indexshift) { 701 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 707 708 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 709 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 710 #else 711 /* forward solve the lower triangular */ 712 x[0] = b[0]; 713 for ( i=1; i<n; i++ ) { 714 ai_i = ai[i]; 715 v = aa + ai_i; 716 vi = aj + ai_i; 717 nz = adiag[i] - ai_i; 718 sum = b[i]; 719 while (nz--) sum -= *v++ * x[*vi++]; 720 x[i] = sum; 721 } 722 723 /* backward solve the upper triangular */ 724 for ( i=n-1; i>=0; i-- ){ 725 adiag_i = adiag[i]; 726 v = aa + adiag_i + 1; 727 vi = aj + adiag_i + 1; 728 nz = ai[i+1] - adiag_i - 1; 729 sum = x[i]; 730 while (nz--) sum -= *v++ * x[*vi++]; 731 x[i] = sum*aa[adiag_i]; 732 } 733 #endif 734 PLogFlops(2*a->nz - a->n); 735 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 736 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNC__ 741 #define __FUNC__ "MatSolveAdd_SeqAIJ" 742 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 743 { 744 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 745 IS iscol = a->col, isrow = a->row; 746 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 747 int nz, shift = a->indexshift,*rout,*cout; 748 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 749 750 PetscFunctionBegin; 751 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 752 753 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 754 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 755 tmp = a->solve_work; 756 757 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 758 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 759 760 /* forward solve the lower triangular */ 761 tmp[0] = b[*r++]; 762 for ( i=1; i<n; i++ ) { 763 v = aa + ai[i] + shift; 764 vi = aj + ai[i] + shift; 765 nz = a->diag[i] - ai[i]; 766 sum = b[*r++]; 767 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 768 tmp[i] = sum; 769 } 770 771 /* backward solve the upper triangular */ 772 for ( i=n-1; i>=0; i-- ){ 773 v = aa + a->diag[i] + (!shift); 774 vi = aj + a->diag[i] + (!shift); 775 nz = ai[i+1] - a->diag[i] - 1; 776 sum = tmp[i]; 777 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 778 tmp[i] = sum*aa[a->diag[i]+shift]; 779 x[*c--] += tmp[i]; 780 } 781 782 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 783 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 784 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 785 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 786 PLogFlops(2*a->nz); 787 788 PetscFunctionReturn(0); 789 } 790 /* -------------------------------------------------------------------*/ 791 #undef __FUNC__ 792 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 793 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 794 { 795 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 796 IS iscol = a->col, isrow = a->row; 797 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 798 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 799 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 800 801 PetscFunctionBegin; 802 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 tmp = a->solve_work; 805 806 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 807 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 808 809 /* copy the b into temp work space according to permutation */ 810 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 811 812 /* forward solve the U^T */ 813 for ( i=0; i<n; i++ ) { 814 v = aa + diag[i] + shift; 815 vi = aj + diag[i] + (!shift); 816 nz = ai[i+1] - diag[i] - 1; 817 s1 = tmp[i]; 818 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 819 while (nz--) { 820 tmp[*vi++ + shift] -= (*v++)*s1; 821 } 822 tmp[i] = s1; 823 } 824 825 /* backward solve the L^T */ 826 for ( i=n-1; i>=0; i-- ){ 827 v = aa + diag[i] - 1 + shift; 828 vi = aj + diag[i] - 1 + shift; 829 nz = diag[i] - ai[i]; 830 s1 = tmp[i]; 831 while (nz--) { 832 tmp[*vi-- + shift] -= (*v--)*s1; 833 } 834 } 835 836 /* copy tmp into x according to permutation */ 837 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 838 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 842 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 843 844 PLogFlops(2*a->nz-a->n); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNC__ 849 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 850 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 853 IS iscol = a->col, isrow = a->row; 854 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 855 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 856 Scalar *x,*b,*tmp, *aa = a->a, *v; 857 858 PetscFunctionBegin; 859 if (zz != xx) VecCopy(zz,xx); 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 867 868 /* copy the b into temp work space according to permutation */ 869 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 870 871 /* forward solve the U^T */ 872 for ( i=0; i<n; i++ ) { 873 v = aa + diag[i] + shift; 874 vi = aj + diag[i] + (!shift); 875 nz = ai[i+1] - diag[i] - 1; 876 tmp[i] *= *v++; 877 while (nz--) { 878 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 879 } 880 } 881 882 /* backward solve the L^T */ 883 for ( i=n-1; i>=0; i-- ){ 884 v = aa + diag[i] - 1 + shift; 885 vi = aj + diag[i] - 1 + shift; 886 nz = diag[i] - ai[i]; 887 while (nz--) { 888 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 889 } 890 } 891 892 /* copy tmp into x according to permutation */ 893 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 894 895 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 896 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 897 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 900 PLogFlops(2*a->nz); 901 PetscFunctionReturn(0); 902 } 903 /* ----------------------------------------------------------------*/ 904 extern int MatMissingDiagonal_SeqAIJ(Mat); 905 906 #undef __FUNC__ 907 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 908 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 909 { 910 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 911 IS isicol; 912 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 913 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 914 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 915 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 916 PetscTruth col_identity, row_identity; 917 double f; 918 919 PetscFunctionBegin; 920 if (info) { 921 f = info->fill; 922 levels = (int) info->levels; 923 diagonal_fill = (int) info->diagonal_fill; 924 } else { 925 f = 1.0; 926 levels = 0; 927 diagonal_fill = 0; 928 } 929 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 930 931 /* special case that simply copies fill pattern */ 932 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 933 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 934 if (!levels && row_identity && col_identity) { 935 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 936 (*fact)->factor = FACTOR_LU; 937 b = (Mat_SeqAIJ *) (*fact)->data; 938 if (!b->diag) { 939 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 940 } 941 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 942 b->row = isrow; 943 b->col = iscol; 944 b->icol = isicol; 945 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 946 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 947 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 948 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 953 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 954 955 /* get new row pointers */ 956 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 957 ainew[0] = -shift; 958 /* don't know how many column pointers are needed so estimate */ 959 jmax = (int) (f*(ai[n]+!shift)); 960 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 961 /* ajfill is level of fill for each fill entry */ 962 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 963 /* fill is a linked list of nonzeros in active row */ 964 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 965 /* im is level for each filled value */ 966 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 967 /* dloc is location of diagonal in factor */ 968 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 969 dloc[0] = 0; 970 for ( prow=0; prow<n; prow++ ) { 971 972 /* copy current row into linked list */ 973 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 974 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 975 xi = aj + ai[r[prow]] + shift; 976 fill[n] = n; 977 fill[prow] = -1; /* marker to indicate if diagonal exists */ 978 while (nz--) { 979 fm = n; 980 idx = ic[*xi++ + shift]; 981 do { 982 m = fm; 983 fm = fill[m]; 984 } while (fm < idx); 985 fill[m] = idx; 986 fill[idx] = fm; 987 im[idx] = 0; 988 } 989 990 /* make sure diagonal entry is included */ 991 if (diagonal_fill && fill[prow] == -1) { 992 fm = n; 993 while (fill[fm] < prow) fm = fill[fm]; 994 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 995 fill[fm] = prow; 996 im[prow] = 0; 997 nzf++; 998 dcount++; 999 } 1000 1001 nzi = 0; 1002 row = fill[n]; 1003 while ( row < prow ) { 1004 incrlev = im[row] + 1; 1005 nz = dloc[row]; 1006 xi = ajnew + ainew[row] + shift + nz + 1; 1007 flev = ajfill + ainew[row] + shift + nz + 1; 1008 nnz = ainew[row+1] - ainew[row] - nz - 1; 1009 fm = row; 1010 while (nnz-- > 0) { 1011 idx = *xi++ + shift; 1012 if (*flev + incrlev > levels) { 1013 flev++; 1014 continue; 1015 } 1016 do { 1017 m = fm; 1018 fm = fill[m]; 1019 } while (fm < idx); 1020 if (fm != idx) { 1021 im[idx] = *flev + incrlev; 1022 fill[m] = idx; 1023 fill[idx] = fm; 1024 fm = idx; 1025 nzf++; 1026 } else { 1027 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1028 } 1029 flev++; 1030 } 1031 row = fill[row]; 1032 nzi++; 1033 } 1034 /* copy new filled row into permanent storage */ 1035 ainew[prow+1] = ainew[prow] + nzf; 1036 if (ainew[prow+1] > jmax-shift) { 1037 1038 /* estimate how much additional space we will need */ 1039 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1040 /* just double the memory each time */ 1041 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1042 int maxadd = jmax; 1043 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1044 jmax += maxadd; 1045 1046 /* allocate a longer ajnew and ajfill */ 1047 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1048 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1049 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1050 ajnew = xi; 1051 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1052 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1053 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1054 ajfill = xi; 1055 realloc++; /* count how many times we realloc */ 1056 } 1057 xi = ajnew + ainew[prow] + shift; 1058 flev = ajfill + ainew[prow] + shift; 1059 dloc[prow] = nzi; 1060 fm = fill[n]; 1061 while (nzf--) { 1062 *xi++ = fm - shift; 1063 *flev++ = im[fm]; 1064 fm = fill[fm]; 1065 } 1066 /* make sure row has diagonal entry */ 1067 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1068 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1069 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1070 } 1071 } 1072 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1074 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1075 ierr = PetscFree(fill);CHKERRQ(ierr); 1076 ierr = PetscFree(im);CHKERRQ(ierr); 1077 1078 { 1079 double af = ((double)ainew[n])/((double)ai[n]); 1080 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1081 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1082 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1083 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1084 if (diagonal_fill) { 1085 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1086 } 1087 } 1088 1089 /* put together the new matrix */ 1090 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1091 PLogObjectParent(*fact,isicol); 1092 b = (Mat_SeqAIJ *) (*fact)->data; 1093 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1094 b->singlemalloc = PETSC_FALSE; 1095 len = (ainew[n] + shift)*sizeof(Scalar); 1096 /* the next line frees the default space generated by the Create() */ 1097 ierr = PetscFree(b->a);CHKERRQ(ierr); 1098 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1099 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1100 b->j = ajnew; 1101 b->i = ainew; 1102 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1103 b->diag = dloc; 1104 b->ilen = 0; 1105 b->imax = 0; 1106 b->row = isrow; 1107 b->col = iscol; 1108 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1109 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1110 b->icol = isicol; 1111 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1112 /* In b structure: Free imax, ilen, old a, old j. 1113 Allocate dloc, solve_work, new a, new j */ 1114 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1115 b->maxnz = b->nz = ainew[n] + shift; 1116 (*fact)->factor = FACTOR_LU; 1117 1118 (*fact)->info.factor_mallocs = realloc; 1119 (*fact)->info.fill_ratio_given = f; 1120 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1121 (*fact)->factor = FACTOR_LU;; 1122 1123 PetscFunctionReturn(0); 1124 } 1125 1126 1127 1128 1129