1 /*$Id: aijfact.c,v 1.139 1999/12/18 00:44:57 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 extern int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 extern int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,double*,double*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 extern int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 56 IS iscolf, isicol, isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr, i, n = a->m; 59 int *old_i = a->i, *old_j = a->j, *new_i, *old_i2, *old_j2,*new_j; 60 int *ordcol, *iwk,*iperm, *jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a, *w, *new_a, *old_a2, *wk,*o_a; 64 double permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int) (1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int) (info->dtcount/2.0); 73 jmax = (int) (info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(new_i); 94 new_j = (int *) PetscMalloc(jmax*sizeof(int)); CHKPTRQ(new_j); 95 new_a = (Scalar *) PetscMalloc(jmax*sizeof(Scalar)); CHKPTRQ(new_a); 96 97 ordcol = (int *) PetscMalloc(n*sizeof(int)); CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(old_i2); 120 old_j2 = (int *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int)); CHKPTRQ(old_j2); 121 old_a2 = (Scalar *) PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(iperm); 144 jw = (int *) PetscMalloc(2*n*sizeof(int)); CHKPTRQ(jw); 145 w = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 151 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar *) PetscMalloc(n*sizeof(Scalar)); CHKPTRQ(wk); 169 iwk = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<=old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,&isirow); CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF, n, ordcol, &iscolf); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ *) (*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 272 IS isicol; 273 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 274 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 275 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 276 277 PetscFunctionBegin; 278 PetscValidHeaderSpecific(isrow,IS_COOKIE); 279 PetscValidHeaderSpecific(iscol,IS_COOKIE); 280 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 281 282 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 283 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 285 286 /* get new row pointers */ 287 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 288 ainew[0] = -shift; 289 /* don't know how many column pointers are needed so estimate */ 290 jmax = (int) (f*ai[n]+(!shift)); 291 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 292 /* fill is a linked list of nonzeros in active row */ 293 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 294 im = fill + n + 1; 295 /* idnew is location of diagonal in factor */ 296 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 297 idnew[0] = -shift; 298 299 for ( i=0; i<n; i++ ) { 300 /* first copy previous fill into linked list */ 301 nnz = nz = ai[r[i]+1] - ai[r[i]]; 302 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]] + shift; 304 fill[n] = n; 305 while (nz--) { 306 fm = n; 307 idx = ic[*ajtmp++ + shift]; 308 do { 309 m = fm; 310 fm = fill[m]; 311 } while (fm < idx); 312 fill[m] = idx; 313 fill[idx] = fm; 314 } 315 row = fill[n]; 316 while ( row < i ) { 317 ajtmp = ajnew + idnew[row] + (!shift); 318 nzbd = 1 + idnew[row] - ainew[row]; 319 nz = im[row] - nzbd; 320 fm = row; 321 while (nz-- > 0) { 322 idx = *ajtmp++ + shift; 323 nzbd++; 324 if (idx == i) im[row] = nzbd; 325 do { 326 m = fm; 327 fm = fill[m]; 328 } while (fm < idx); 329 if (fm != idx) { 330 fill[m] = idx; 331 fill[idx] = fm; 332 fm = idx; 333 nnz++; 334 } 335 } 336 row = fill[row]; 337 } 338 /* copy new filled row into permanent storage */ 339 ainew[i+1] = ainew[i] + nnz; 340 if (ainew[i+1] > jmax) { 341 342 /* estimate how much additional space we will need */ 343 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 344 /* just double the memory each time */ 345 int maxadd = jmax; 346 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 347 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 348 jmax += maxadd; 349 350 /* allocate a longer ajnew */ 351 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 352 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 353 ierr = PetscFree(ajnew);CHKERRQ(ierr); 354 ajnew = ajtmp; 355 realloc++; /* count how many times we realloc */ 356 } 357 ajtmp = ajnew + ainew[i] + shift; 358 fm = fill[n]; 359 nzi = 0; 360 im[i] = nnz; 361 while (nnz--) { 362 if (fm < i) nzi++; 363 *ajtmp++ = fm - shift; 364 fm = fill[fm]; 365 } 366 idnew[i] = ainew[i] + nzi; 367 } 368 if (ai[n] != 0) { 369 double af = ((double)ainew[n])/((double)ai[n]); 370 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 371 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 372 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 373 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 374 } else { 375 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 ierr = PetscFree(fill);CHKERRQ(ierr); 382 383 /* put together the new matrix */ 384 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 385 PLogObjectParent(*B,isicol); 386 b = (Mat_SeqAIJ *) (*B)->data; 387 ierr = PetscFree(b->imax);CHKERRQ(ierr); 388 b->singlemalloc = PETSC_FALSE; 389 /* the next line frees the default space generated by the Create() */ 390 ierr = PetscFree(b->a);CHKERRQ(ierr); 391 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 392 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 393 b->j = ajnew; 394 b->i = ainew; 395 b->diag = idnew; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 404 /* In b structure: Free imax, ilen, old a, old j. 405 Allocate idnew, solve_work, new a, new j */ 406 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 407 b->maxnz = b->nz = ainew[n] + shift; 408 409 (*B)->factor = FACTOR_LU;; 410 (*B)->info.factor_mallocs = realloc; 411 (*B)->info.fill_ratio_given = f; 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 413 414 if (ai[n] != 0) { 415 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 416 } else { 417 (*B)->info.fill_ratio_needed = 0.0; 418 } 419 PetscFunctionReturn(0); 420 } 421 /* ----------------------------------------------------------- */ 422 extern int Mat_AIJ_CheckInode(Mat); 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 426 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 427 { 428 Mat C = *B; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 430 IS isrow = b->row, isicol = b->icol; 431 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 432 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 433 int *diag_offset = b->diag,diag,k; 434 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 435 register int *pj; 436 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 437 double ssum; 438 register Scalar *pv, *rtmps,*u_values; 439 440 PetscFunctionBegin; 441 442 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 443 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 444 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 445 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 446 rtmps = rtmp + shift; ics = ic + shift; 447 448 /* precalculate row sums */ 449 if (preserve_row_sums) { 450 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 451 for ( i=0; i<n; i++ ) { 452 nz = a->i[r[i]+1] - a->i[r[i]]; 453 v = a->a + a->i[r[i]] + shift; 454 sum = 0.0; 455 for ( j=0; j<nz; j++ ) sum += v[j]; 456 rowsums[i] = sum; 457 } 458 } 459 460 for ( i=0; i<n; i++ ) { 461 nz = ai[i+1] - ai[i]; 462 ajtmp = aj + ai[i] + shift; 463 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 464 465 /* load in initial (unfactored row) */ 466 nz = a->i[r[i]+1] - a->i[r[i]]; 467 ajtmpold = a->j + a->i[r[i]] + shift; 468 v = a->a + a->i[r[i]] + shift; 469 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 470 471 row = *ajtmp++ + shift; 472 while (row < i ) { 473 pc = rtmp + row; 474 if (*pc != 0.0) { 475 pv = b->a + diag_offset[row] + shift; 476 pj = b->j + diag_offset[row] + (!shift); 477 multiplier = *pc / *pv++; 478 *pc = multiplier; 479 nz = ai[row+1] - diag_offset[row] - 1; 480 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 481 PLogFlops(2*nz); 482 } 483 row = *ajtmp++ + shift; 484 } 485 /* finished row so stick it into b->a */ 486 pv = b->a + ai[i] + shift; 487 pj = b->j + ai[i] + shift; 488 nz = ai[i+1] - ai[i]; 489 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 490 diag = diag_offset[i] - ai[i]; 491 /* 492 Possibly adjust diagonal entry on current row to force 493 LU matrix to have same row sum as initial matrix. 494 */ 495 if (pv[diag] == 0.0) { 496 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 497 } 498 if (preserve_row_sums) { 499 pj = b->j + ai[i] + shift; 500 sum = rowsums[i]; 501 for ( j=0; j<diag; j++ ) { 502 u_values = b->a + diag_offset[pj[j]] + shift; 503 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 504 inner_sum = 0.0; 505 for ( k=0; k<nz; k++ ) { 506 inner_sum += u_values[k]; 507 } 508 sum -= pv[j]*inner_sum; 509 510 } 511 nz = ai[i+1] - diag_offset[i] - 1; 512 u_values = b->a + diag_offset[i] + 1 + shift; 513 for ( k=0; k<nz; k++ ) { 514 sum -= u_values[k]; 515 } 516 ssum = PetscAbsScalar(sum/pv[diag]); 517 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 518 } 519 /* check pivot entry for current row */ 520 } 521 522 /* invert diagonal entries for simplier triangular solves */ 523 for ( i=0; i<n; i++ ) { 524 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 525 } 526 527 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 528 ierr = PetscFree(rtmp);CHKERRQ(ierr); 529 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 530 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 531 C->factor = FACTOR_LU; 532 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 533 C->assembled = PETSC_TRUE; 534 PLogFlops(b->n); 535 PetscFunctionReturn(0); 536 } 537 /* ----------------------------------------------------------- */ 538 #undef __FUNC__ 539 #define __FUNC__ "MatLUFactor_SeqAIJ" 540 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 541 { 542 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 543 int ierr,refct; 544 Mat C; 545 PetscOps *Abops; 546 MatOps Aops; 547 548 PetscFunctionBegin; 549 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 550 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 551 552 /* free all the data structures from mat */ 553 ierr = PetscFree(mat->a);CHKERRQ(ierr); 554 if (!mat->singlemalloc) { 555 ierr = PetscFree(mat->i);CHKERRQ(ierr); 556 ierr = PetscFree(mat->j);CHKERRQ(ierr); 557 } 558 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 559 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 560 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 561 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 562 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 563 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 564 ierr = PetscFree(mat);CHKERRQ(ierr); 565 566 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 567 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 568 569 /* 570 This is horrible, horrible code. We need to keep the 571 A pointers for the bops and ops but copy everything 572 else from C. 573 */ 574 Abops = A->bops; 575 Aops = A->ops; 576 refct = A->refct; 577 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 578 mat = (Mat_SeqAIJ *) A->data; 579 PLogObjectParent(A,mat->icol); 580 581 A->bops = Abops; 582 A->ops = Aops; 583 A->qlist = 0; 584 A->refct = refct; 585 /* copy over the type_name and name */ 586 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 587 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 588 589 PetscHeaderDestroy(C); 590 591 PetscFunctionReturn(0); 592 } 593 /* ----------------------------------------------------------- */ 594 #undef __FUNC__ 595 #define __FUNC__ "MatSolve_SeqAIJ" 596 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 597 { 598 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 599 IS iscol = a->col, isrow = a->row; 600 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 601 int nz,shift = a->indexshift,*rout,*cout; 602 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 603 604 PetscFunctionBegin; 605 if (!n) PetscFunctionReturn(0); 606 607 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 608 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 609 tmp = a->solve_work; 610 611 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 612 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 613 614 /* forward solve the lower triangular */ 615 tmp[0] = b[*r++]; 616 tmps = tmp + shift; 617 for ( i=1; i<n; i++ ) { 618 v = aa + ai[i] + shift; 619 vi = aj + ai[i] + shift; 620 nz = a->diag[i] - ai[i]; 621 sum = b[*r++]; 622 while (nz--) sum -= *v++ * tmps[*vi++]; 623 tmp[i] = sum; 624 } 625 626 /* backward solve the upper triangular */ 627 for ( i=n-1; i>=0; i-- ){ 628 v = aa + a->diag[i] + (!shift); 629 vi = aj + a->diag[i] + (!shift); 630 nz = ai[i+1] - a->diag[i] - 1; 631 sum = tmp[i]; 632 while (nz--) sum -= *v++ * tmps[*vi++]; 633 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 634 } 635 636 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 637 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 638 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 640 PLogFlops(2*a->nz - a->n); 641 PetscFunctionReturn(0); 642 } 643 644 /* ----------------------------------------------------------- */ 645 #undef __FUNC__ 646 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 647 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 648 { 649 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 650 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 651 Scalar *x,*b, *aa = a->a, sum; 652 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 653 int adiag_i,i,*vi,nz,ai_i; 654 Scalar *v; 655 #endif 656 657 PetscFunctionBegin; 658 if (!n) PetscFunctionReturn(0); 659 if (a->indexshift) { 660 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 665 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 666 667 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 668 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 669 #else 670 /* forward solve the lower triangular */ 671 x[0] = b[0]; 672 for ( i=1; i<n; i++ ) { 673 ai_i = ai[i]; 674 v = aa + ai_i; 675 vi = aj + ai_i; 676 nz = adiag[i] - ai_i; 677 sum = b[i]; 678 while (nz--) sum -= *v++ * x[*vi++]; 679 x[i] = sum; 680 } 681 682 /* backward solve the upper triangular */ 683 for ( i=n-1; i>=0; i-- ){ 684 adiag_i = adiag[i]; 685 v = aa + adiag_i + 1; 686 vi = aj + adiag_i + 1; 687 nz = ai[i+1] - adiag_i - 1; 688 sum = x[i]; 689 while (nz--) sum -= *v++ * x[*vi++]; 690 x[i] = sum*aa[adiag_i]; 691 } 692 #endif 693 PLogFlops(2*a->nz - a->n); 694 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 695 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNC__ 700 #define __FUNC__ "MatSolveAdd_SeqAIJ" 701 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 702 { 703 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 704 IS iscol = a->col, isrow = a->row; 705 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 706 int nz, shift = a->indexshift,*rout,*cout; 707 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 708 709 PetscFunctionBegin; 710 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 711 712 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 713 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 714 tmp = a->solve_work; 715 716 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 717 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 718 719 /* forward solve the lower triangular */ 720 tmp[0] = b[*r++]; 721 for ( i=1; i<n; i++ ) { 722 v = aa + ai[i] + shift; 723 vi = aj + ai[i] + shift; 724 nz = a->diag[i] - ai[i]; 725 sum = b[*r++]; 726 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 727 tmp[i] = sum; 728 } 729 730 /* backward solve the upper triangular */ 731 for ( i=n-1; i>=0; i-- ){ 732 v = aa + a->diag[i] + (!shift); 733 vi = aj + a->diag[i] + (!shift); 734 nz = ai[i+1] - a->diag[i] - 1; 735 sum = tmp[i]; 736 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 737 tmp[i] = sum*aa[a->diag[i]+shift]; 738 x[*c--] += tmp[i]; 739 } 740 741 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 743 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 745 PLogFlops(2*a->nz); 746 747 PetscFunctionReturn(0); 748 } 749 /* -------------------------------------------------------------------*/ 750 #undef __FUNC__ 751 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 752 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb, Vec xx) 753 { 754 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 755 IS iscol = a->col, isrow = a->row; 756 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 757 int nz,shift = a->indexshift,*rout,*cout, *diag = a->diag; 758 Scalar *x,*b,*tmp, *aa = a->a, *v, s1; 759 760 PetscFunctionBegin; 761 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 762 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 763 tmp = a->solve_work; 764 765 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 766 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 767 768 /* copy the b into temp work space according to permutation */ 769 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 770 771 /* forward solve the U^T */ 772 for ( i=0; i<n; i++ ) { 773 v = aa + diag[i] + shift; 774 vi = aj + diag[i] + (!shift); 775 nz = ai[i+1] - diag[i] - 1; 776 s1 = tmp[i]; 777 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 778 while (nz--) { 779 tmp[*vi++ + shift] -= (*v++)*s1; 780 } 781 tmp[i] = s1; 782 } 783 784 /* backward solve the L^T */ 785 for ( i=n-1; i>=0; i-- ){ 786 v = aa + diag[i] - 1 + shift; 787 vi = aj + diag[i] - 1 + shift; 788 nz = diag[i] - ai[i]; 789 s1 = tmp[i]; 790 while (nz--) { 791 tmp[*vi-- + shift] -= (*v--)*s1; 792 } 793 } 794 795 /* copy tmp into x according to permutation */ 796 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 797 798 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 799 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 801 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 802 803 PLogFlops(2*a->nz-a->n); 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNC__ 808 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 809 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 810 { 811 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 812 IS iscol = a->col, isrow = a->row; 813 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 814 int nz,shift = a->indexshift, *rout, *cout, *diag = a->diag; 815 Scalar *x,*b,*tmp, *aa = a->a, *v; 816 817 PetscFunctionBegin; 818 if (zz != xx) VecCopy(zz,xx); 819 820 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 tmp = a->solve_work; 823 824 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 825 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 826 827 /* copy the b into temp work space according to permutation */ 828 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 829 830 /* forward solve the U^T */ 831 for ( i=0; i<n; i++ ) { 832 v = aa + diag[i] + shift; 833 vi = aj + diag[i] + (!shift); 834 nz = ai[i+1] - diag[i] - 1; 835 tmp[i] *= *v++; 836 while (nz--) { 837 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 838 } 839 } 840 841 /* backward solve the L^T */ 842 for ( i=n-1; i>=0; i-- ){ 843 v = aa + diag[i] - 1 + shift; 844 vi = aj + diag[i] - 1 + shift; 845 nz = diag[i] - ai[i]; 846 while (nz--) { 847 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 848 } 849 } 850 851 /* copy tmp into x according to permutation */ 852 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 853 854 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 855 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 856 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 857 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 858 859 PLogFlops(2*a->nz); 860 PetscFunctionReturn(0); 861 } 862 /* ----------------------------------------------------------------*/ 863 extern int MatMissingDiagonal_SeqAIJ(Mat); 864 865 #undef __FUNC__ 866 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 867 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 868 { 869 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 870 IS isicol; 871 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 872 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 873 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 874 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 875 PetscTruth col_identity, row_identity; 876 double f; 877 878 PetscFunctionBegin; 879 if (info) { 880 f = info->fill; 881 levels = (int) info->levels; 882 diagonal_fill = (int) info->diagonal_fill; 883 } else { 884 f = 1.0; 885 levels = 0; 886 diagonal_fill = 0; 887 } 888 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 889 890 /* special case that simply copies fill pattern */ 891 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 892 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 893 if (!levels && row_identity && col_identity) { 894 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 895 (*fact)->factor = FACTOR_LU; 896 b = (Mat_SeqAIJ *) (*fact)->data; 897 if (!b->diag) { 898 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 899 } 900 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 901 b->row = isrow; 902 b->col = iscol; 903 b->icol = isicol; 904 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 905 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 906 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 907 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 912 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 913 914 /* get new row pointers */ 915 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 916 ainew[0] = -shift; 917 /* don't know how many column pointers are needed so estimate */ 918 jmax = (int) (f*(ai[n]+!shift)); 919 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 920 /* ajfill is level of fill for each fill entry */ 921 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 922 /* fill is a linked list of nonzeros in active row */ 923 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 924 /* im is level for each filled value */ 925 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 926 /* dloc is location of diagonal in factor */ 927 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 928 dloc[0] = 0; 929 for ( prow=0; prow<n; prow++ ) { 930 931 /* copy current row into linked list */ 932 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 933 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 934 xi = aj + ai[r[prow]] + shift; 935 fill[n] = n; 936 fill[prow] = -1; /* marker to indicate if diagonal exists */ 937 while (nz--) { 938 fm = n; 939 idx = ic[*xi++ + shift]; 940 do { 941 m = fm; 942 fm = fill[m]; 943 } while (fm < idx); 944 fill[m] = idx; 945 fill[idx] = fm; 946 im[idx] = 0; 947 } 948 949 /* make sure diagonal entry is included */ 950 if (diagonal_fill && fill[prow] == -1) { 951 fm = n; 952 while (fill[fm] < prow) fm = fill[fm]; 953 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 954 fill[fm] = prow; 955 im[prow] = 0; 956 nzf++; 957 dcount++; 958 } 959 960 nzi = 0; 961 row = fill[n]; 962 while ( row < prow ) { 963 incrlev = im[row] + 1; 964 nz = dloc[row]; 965 xi = ajnew + ainew[row] + shift + nz + 1; 966 flev = ajfill + ainew[row] + shift + nz + 1; 967 nnz = ainew[row+1] - ainew[row] - nz - 1; 968 fm = row; 969 while (nnz-- > 0) { 970 idx = *xi++ + shift; 971 if (*flev + incrlev > levels) { 972 flev++; 973 continue; 974 } 975 do { 976 m = fm; 977 fm = fill[m]; 978 } while (fm < idx); 979 if (fm != idx) { 980 im[idx] = *flev + incrlev; 981 fill[m] = idx; 982 fill[idx] = fm; 983 fm = idx; 984 nzf++; 985 } else { 986 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 987 } 988 flev++; 989 } 990 row = fill[row]; 991 nzi++; 992 } 993 /* copy new filled row into permanent storage */ 994 ainew[prow+1] = ainew[prow] + nzf; 995 if (ainew[prow+1] > jmax-shift) { 996 997 /* estimate how much additional space we will need */ 998 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 999 /* just double the memory each time */ 1000 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1001 int maxadd = jmax; 1002 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1003 jmax += maxadd; 1004 1005 /* allocate a longer ajnew and ajfill */ 1006 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1007 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1008 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1009 ajnew = xi; 1010 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 1011 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1012 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1013 ajfill = xi; 1014 realloc++; /* count how many times we realloc */ 1015 } 1016 xi = ajnew + ainew[prow] + shift; 1017 flev = ajfill + ainew[prow] + shift; 1018 dloc[prow] = nzi; 1019 fm = fill[n]; 1020 while (nzf--) { 1021 *xi++ = fm - shift; 1022 *flev++ = im[fm]; 1023 fm = fill[fm]; 1024 } 1025 /* make sure row has diagonal entry */ 1026 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1027 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1028 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1029 } 1030 } 1031 ierr = PetscFree(ajfill); CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1033 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1034 ierr = PetscFree(fill);CHKERRQ(ierr); 1035 ierr = PetscFree(im);CHKERRQ(ierr); 1036 1037 { 1038 double af = ((double)ainew[n])/((double)ai[n]); 1039 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1040 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1041 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1042 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1043 if (diagonal_fill) { 1044 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1045 } 1046 } 1047 1048 /* put together the new matrix */ 1049 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1050 PLogObjectParent(*fact,isicol); 1051 b = (Mat_SeqAIJ *) (*fact)->data; 1052 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1053 b->singlemalloc = PETSC_FALSE; 1054 len = (ainew[n] + shift)*sizeof(Scalar); 1055 /* the next line frees the default space generated by the Create() */ 1056 ierr = PetscFree(b->a);CHKERRQ(ierr); 1057 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1058 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 1059 b->j = ajnew; 1060 b->i = ainew; 1061 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 1062 b->diag = dloc; 1063 b->ilen = 0; 1064 b->imax = 0; 1065 b->row = isrow; 1066 b->col = iscol; 1067 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1068 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1069 b->icol = isicol; 1070 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1071 /* In b structure: Free imax, ilen, old a, old j. 1072 Allocate dloc, solve_work, new a, new j */ 1073 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1074 b->maxnz = b->nz = ainew[n] + shift; 1075 (*fact)->factor = FACTOR_LU; 1076 1077 (*fact)->info.factor_mallocs = realloc; 1078 (*fact)->info.fill_ratio_given = f; 1079 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 1080 (*fact)->factor = FACTOR_LU;; 1081 1082 PetscFunctionReturn(0); 1083 } 1084 1085 1086 1087 1088