1 /*$Id: aijfact.c,v 1.144 2000/02/02 20:08:56 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 extern int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 extern int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 extern int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,PetscReal f,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 277 PetscFunctionBegin; 278 PetscValidHeaderSpecific(isrow,IS_COOKIE); 279 PetscValidHeaderSpecific(iscol,IS_COOKIE); 280 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 281 282 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 283 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 285 286 /* get new row pointers */ 287 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 288 ainew[0] = -shift; 289 /* don't know how many column pointers are needed so estimate */ 290 jmax = (int)(f*ai[n]+(!shift)); 291 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 292 /* fill is a linked list of nonzeros in active row */ 293 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 294 im = fill + n + 1; 295 /* idnew is location of diagonal in factor */ 296 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 297 idnew[0] = -shift; 298 299 for (i=0; i<n; i++) { 300 /* first copy previous fill into linked list */ 301 nnz = nz = ai[r[i]+1] - ai[r[i]]; 302 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]] + shift; 304 fill[n] = n; 305 while (nz--) { 306 fm = n; 307 idx = ic[*ajtmp++ + shift]; 308 do { 309 m = fm; 310 fm = fill[m]; 311 } while (fm < idx); 312 fill[m] = idx; 313 fill[idx] = fm; 314 } 315 row = fill[n]; 316 while (row < i) { 317 ajtmp = ajnew + idnew[row] + (!shift); 318 nzbd = 1 + idnew[row] - ainew[row]; 319 nz = im[row] - nzbd; 320 fm = row; 321 while (nz-- > 0) { 322 idx = *ajtmp++ + shift; 323 nzbd++; 324 if (idx == i) im[row] = nzbd; 325 do { 326 m = fm; 327 fm = fill[m]; 328 } while (fm < idx); 329 if (fm != idx) { 330 fill[m] = idx; 331 fill[idx] = fm; 332 fm = idx; 333 nnz++; 334 } 335 } 336 row = fill[row]; 337 } 338 /* copy new filled row into permanent storage */ 339 ainew[i+1] = ainew[i] + nnz; 340 if (ainew[i+1] > jmax) { 341 342 /* estimate how much additional space we will need */ 343 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 344 /* just double the memory each time */ 345 int maxadd = jmax; 346 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 347 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 348 jmax += maxadd; 349 350 /* allocate a longer ajnew */ 351 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 352 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 353 ierr = PetscFree(ajnew);CHKERRQ(ierr); 354 ajnew = ajtmp; 355 realloc++; /* count how many times we realloc */ 356 } 357 ajtmp = ajnew + ainew[i] + shift; 358 fm = fill[n]; 359 nzi = 0; 360 im[i] = nnz; 361 while (nnz--) { 362 if (fm < i) nzi++; 363 *ajtmp++ = fm - shift; 364 fm = fill[fm]; 365 } 366 idnew[i] = ainew[i] + nzi; 367 } 368 if (ai[n] != 0) { 369 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 370 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 371 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 372 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 373 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 374 } else { 375 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 ierr = PetscFree(fill);CHKERRQ(ierr); 382 383 /* put together the new matrix */ 384 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 385 PLogObjectParent(*B,isicol); 386 b = (Mat_SeqAIJ*)(*B)->data; 387 ierr = PetscFree(b->imax);CHKERRQ(ierr); 388 b->singlemalloc = PETSC_FALSE; 389 /* the next line frees the default space generated by the Create() */ 390 ierr = PetscFree(b->a);CHKERRQ(ierr); 391 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 392 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 393 b->j = ajnew; 394 b->i = ainew; 395 b->diag = idnew; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 404 /* In b structure: Free imax, ilen, old a, old j. 405 Allocate idnew, solve_work, new a, new j */ 406 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 407 b->maxnz = b->nz = ainew[n] + shift; 408 409 (*B)->factor = FACTOR_LU; 410 (*B)->info.factor_mallocs = realloc; 411 (*B)->info.fill_ratio_given = f; 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 413 414 if (ai[n] != 0) { 415 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 416 } else { 417 (*B)->info.fill_ratio_needed = 0.0; 418 } 419 PetscFunctionReturn(0); 420 } 421 /* ----------------------------------------------------------- */ 422 extern int Mat_AIJ_CheckInode(Mat); 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 426 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 427 { 428 Mat C = *B; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 430 IS isrow = b->row,isicol = b->icol; 431 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 432 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 433 int *diag_offset = b->diag,diag,k; 434 int preserve_row_sums = (int)a->ilu_preserve_row_sums,*pj; 435 Scalar *rtmp,*v,*pc,multiplier,sum,inner_sum,*rowsums = 0,*pv,*rtmps,*u_values; 436 PetscReal ssum; 437 438 PetscFunctionBegin; 439 440 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 441 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 442 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 443 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 444 rtmps = rtmp + shift; ics = ic + shift; 445 446 /* precalculate row sums */ 447 if (preserve_row_sums) { 448 rowsums = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(rowsums); 449 for (i=0; i<n; i++) { 450 nz = a->i[r[i]+1] - a->i[r[i]]; 451 v = a->a + a->i[r[i]] + shift; 452 sum = 0.0; 453 for (j=0; j<nz; j++) sum += v[j]; 454 rowsums[i] = sum; 455 } 456 } 457 458 for (i=0; i<n; i++) { 459 nz = ai[i+1] - ai[i]; 460 ajtmp = aj + ai[i] + shift; 461 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 462 463 /* load in initial (unfactored row) */ 464 nz = a->i[r[i]+1] - a->i[r[i]]; 465 ajtmpold = a->j + a->i[r[i]] + shift; 466 v = a->a + a->i[r[i]] + shift; 467 for (j=0; j<nz; j++) rtmp[ics[ajtmpold[j]]] = v[j]; 468 469 row = *ajtmp++ + shift; 470 while (row < i) { 471 pc = rtmp + row; 472 if (*pc != 0.0) { 473 pv = b->a + diag_offset[row] + shift; 474 pj = b->j + diag_offset[row] + (!shift); 475 multiplier = *pc / *pv++; 476 *pc = multiplier; 477 nz = ai[row+1] - diag_offset[row] - 1; 478 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 479 PLogFlops(2*nz); 480 } 481 row = *ajtmp++ + shift; 482 } 483 /* finished row so stick it into b->a */ 484 pv = b->a + ai[i] + shift; 485 pj = b->j + ai[i] + shift; 486 nz = ai[i+1] - ai[i]; 487 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 488 diag = diag_offset[i] - ai[i]; 489 /* 490 Possibly adjust diagonal entry on current row to force 491 LU matrix to have same row sum as initial matrix. 492 */ 493 if (pv[diag] == 0.0) { 494 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 495 } 496 if (preserve_row_sums) { 497 pj = b->j + ai[i] + shift; 498 sum = rowsums[i]; 499 for (j=0; j<diag; j++) { 500 u_values = b->a + diag_offset[pj[j]] + shift; 501 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 502 inner_sum = 0.0; 503 for (k=0; k<nz; k++) { 504 inner_sum += u_values[k]; 505 } 506 sum -= pv[j]*inner_sum; 507 508 } 509 nz = ai[i+1] - diag_offset[i] - 1; 510 u_values = b->a + diag_offset[i] + 1 + shift; 511 for (k=0; k<nz; k++) { 512 sum -= u_values[k]; 513 } 514 ssum = PetscAbsScalar(sum/pv[diag]); 515 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 516 } 517 /* check pivot entry for current row */ 518 } 519 520 /* invert diagonal entries for simplier triangular solves */ 521 for (i=0; i<n; i++) { 522 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 523 } 524 525 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 526 ierr = PetscFree(rtmp);CHKERRQ(ierr); 527 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 528 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 529 C->factor = FACTOR_LU; 530 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 531 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 532 C->assembled = PETSC_TRUE; 533 PLogFlops(b->n); 534 PetscFunctionReturn(0); 535 } 536 /* ----------------------------------------------------------- */ 537 #undef __FUNC__ 538 #define __FUNC__ "MatLUFactor_SeqAIJ" 539 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,PetscReal f) 540 { 541 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 542 int ierr,refct; 543 Mat C; 544 PetscOps *Abops; 545 MatOps Aops; 546 547 PetscFunctionBegin; 548 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 549 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 550 551 /* free all the data structures from mat */ 552 ierr = PetscFree(mat->a);CHKERRQ(ierr); 553 if (!mat->singlemalloc) { 554 ierr = PetscFree(mat->i);CHKERRQ(ierr); 555 ierr = PetscFree(mat->j);CHKERRQ(ierr); 556 } 557 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 558 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 559 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 560 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 561 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 562 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 563 ierr = PetscFree(mat);CHKERRQ(ierr); 564 565 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 566 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 567 568 /* 569 This is horrible, horrible code. We need to keep the 570 A pointers for the bops and ops but copy everything 571 else from C. 572 */ 573 Abops = A->bops; 574 Aops = A->ops; 575 refct = A->refct; 576 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 577 mat = (Mat_SeqAIJ*)A->data; 578 PLogObjectParent(A,mat->icol); 579 580 A->bops = Abops; 581 A->ops = Aops; 582 A->qlist = 0; 583 A->refct = refct; 584 /* copy over the type_name and name */ 585 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 586 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 587 588 PetscHeaderDestroy(C); 589 590 PetscFunctionReturn(0); 591 } 592 /* ----------------------------------------------------------- */ 593 #undef __FUNC__ 594 #define __FUNC__ "MatSolve_SeqAIJ" 595 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 596 { 597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 598 IS iscol = a->col,isrow = a->row; 599 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 600 int nz,shift = a->indexshift,*rout,*cout; 601 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 602 603 PetscFunctionBegin; 604 if (!n) PetscFunctionReturn(0); 605 606 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 607 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 608 tmp = a->solve_work; 609 610 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 611 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 612 613 /* forward solve the lower triangular */ 614 tmp[0] = b[*r++]; 615 tmps = tmp + shift; 616 for (i=1; i<n; i++) { 617 v = aa + ai[i] + shift; 618 vi = aj + ai[i] + shift; 619 nz = a->diag[i] - ai[i]; 620 sum = b[*r++]; 621 while (nz--) sum -= *v++ * tmps[*vi++]; 622 tmp[i] = sum; 623 } 624 625 /* backward solve the upper triangular */ 626 for (i=n-1; i>=0; i--){ 627 v = aa + a->diag[i] + (!shift); 628 vi = aj + a->diag[i] + (!shift); 629 nz = ai[i+1] - a->diag[i] - 1; 630 sum = tmp[i]; 631 while (nz--) sum -= *v++ * tmps[*vi++]; 632 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 633 } 634 635 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 636 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 637 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 638 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 639 PLogFlops(2*a->nz - a->n); 640 PetscFunctionReturn(0); 641 } 642 643 /* ----------------------------------------------------------- */ 644 #undef __FUNC__ 645 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 646 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 647 { 648 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 649 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 650 Scalar *x,*b,*aa = a->a,sum; 651 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 652 int adiag_i,i,*vi,nz,ai_i; 653 Scalar *v; 654 #endif 655 656 PetscFunctionBegin; 657 if (!n) PetscFunctionReturn(0); 658 if (a->indexshift) { 659 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 664 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 665 666 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 667 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 668 #else 669 /* forward solve the lower triangular */ 670 x[0] = b[0]; 671 for (i=1; i<n; i++) { 672 ai_i = ai[i]; 673 v = aa + ai_i; 674 vi = aj + ai_i; 675 nz = adiag[i] - ai_i; 676 sum = b[i]; 677 while (nz--) sum -= *v++ * x[*vi++]; 678 x[i] = sum; 679 } 680 681 /* backward solve the upper triangular */ 682 for (i=n-1; i>=0; i--){ 683 adiag_i = adiag[i]; 684 v = aa + adiag_i + 1; 685 vi = aj + adiag_i + 1; 686 nz = ai[i+1] - adiag_i - 1; 687 sum = x[i]; 688 while (nz--) sum -= *v++ * x[*vi++]; 689 x[i] = sum*aa[adiag_i]; 690 } 691 #endif 692 PLogFlops(2*a->nz - a->n); 693 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 694 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNC__ 699 #define __FUNC__ "MatSolveAdd_SeqAIJ" 700 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 701 { 702 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 703 IS iscol = a->col,isrow = a->row; 704 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 705 int nz,shift = a->indexshift,*rout,*cout; 706 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 707 708 PetscFunctionBegin; 709 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 710 711 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 712 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 713 tmp = a->solve_work; 714 715 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 716 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 717 718 /* forward solve the lower triangular */ 719 tmp[0] = b[*r++]; 720 for (i=1; i<n; i++) { 721 v = aa + ai[i] + shift; 722 vi = aj + ai[i] + shift; 723 nz = a->diag[i] - ai[i]; 724 sum = b[*r++]; 725 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 726 tmp[i] = sum; 727 } 728 729 /* backward solve the upper triangular */ 730 for (i=n-1; i>=0; i--){ 731 v = aa + a->diag[i] + (!shift); 732 vi = aj + a->diag[i] + (!shift); 733 nz = ai[i+1] - a->diag[i] - 1; 734 sum = tmp[i]; 735 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 736 tmp[i] = sum*aa[a->diag[i]+shift]; 737 x[*c--] += tmp[i]; 738 } 739 740 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 742 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 743 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 744 PLogFlops(2*a->nz); 745 746 PetscFunctionReturn(0); 747 } 748 /* -------------------------------------------------------------------*/ 749 #undef __FUNC__ 750 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 751 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 752 { 753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 754 IS iscol = a->col,isrow = a->row; 755 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 756 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 757 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 758 759 PetscFunctionBegin; 760 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 761 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 762 tmp = a->solve_work; 763 764 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 765 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 766 767 /* copy the b into temp work space according to permutation */ 768 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 769 770 /* forward solve the U^T */ 771 for (i=0; i<n; i++) { 772 v = aa + diag[i] + shift; 773 vi = aj + diag[i] + (!shift); 774 nz = ai[i+1] - diag[i] - 1; 775 s1 = tmp[i]; 776 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 777 while (nz--) { 778 tmp[*vi++ + shift] -= (*v++)*s1; 779 } 780 tmp[i] = s1; 781 } 782 783 /* backward solve the L^T */ 784 for (i=n-1; i>=0; i--){ 785 v = aa + diag[i] - 1 + shift; 786 vi = aj + diag[i] - 1 + shift; 787 nz = diag[i] - ai[i]; 788 s1 = tmp[i]; 789 while (nz--) { 790 tmp[*vi-- + shift] -= (*v--)*s1; 791 } 792 } 793 794 /* copy tmp into x according to permutation */ 795 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 796 797 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 798 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 799 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 800 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801 802 PLogFlops(2*a->nz-a->n); 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNC__ 807 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 808 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 809 { 810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 811 IS iscol = a->col,isrow = a->row; 812 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 813 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 814 Scalar *x,*b,*tmp,*aa = a->a,*v; 815 816 PetscFunctionBegin; 817 if (zz != xx) VecCopy(zz,xx); 818 819 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 820 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 821 tmp = a->solve_work; 822 823 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 824 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 825 826 /* copy the b into temp work space according to permutation */ 827 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 828 829 /* forward solve the U^T */ 830 for (i=0; i<n; i++) { 831 v = aa + diag[i] + shift; 832 vi = aj + diag[i] + (!shift); 833 nz = ai[i+1] - diag[i] - 1; 834 tmp[i] *= *v++; 835 while (nz--) { 836 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 837 } 838 } 839 840 /* backward solve the L^T */ 841 for (i=n-1; i>=0; i--){ 842 v = aa + diag[i] - 1 + shift; 843 vi = aj + diag[i] - 1 + shift; 844 nz = diag[i] - ai[i]; 845 while (nz--) { 846 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 847 } 848 } 849 850 /* copy tmp into x according to permutation */ 851 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 852 853 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 854 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 855 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 PLogFlops(2*a->nz); 859 PetscFunctionReturn(0); 860 } 861 /* ----------------------------------------------------------------*/ 862 extern int MatMissingDiagonal_SeqAIJ(Mat); 863 864 #undef __FUNC__ 865 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 866 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 867 { 868 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 869 IS isicol; 870 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 871 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 872 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 873 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 874 PetscTruth col_identity,row_identity; 875 PetscReal f; 876 877 PetscFunctionBegin; 878 if (info) { 879 f = info->fill; 880 levels = (int)info->levels; 881 diagonal_fill = (int)info->diagonal_fill; 882 } else { 883 f = 1.0; 884 levels = 0; 885 diagonal_fill = 0; 886 } 887 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 888 889 /* special case that simply copies fill pattern */ 890 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 891 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 892 if (!levels && row_identity && col_identity) { 893 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 894 (*fact)->factor = FACTOR_LU; 895 b = (Mat_SeqAIJ*)(*fact)->data; 896 if (!b->diag) { 897 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 898 } 899 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 900 b->row = isrow; 901 b->col = iscol; 902 b->icol = isicol; 903 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 904 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 905 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 906 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 911 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 912 913 /* get new row pointers */ 914 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 915 ainew[0] = -shift; 916 /* don't know how many column pointers are needed so estimate */ 917 jmax = (int)(f*(ai[n]+!shift)); 918 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 919 /* ajfill is level of fill for each fill entry */ 920 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 921 /* fill is a linked list of nonzeros in active row */ 922 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 923 /* im is level for each filled value */ 924 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 925 /* dloc is location of diagonal in factor */ 926 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 927 dloc[0] = 0; 928 for (prow=0; prow<n; prow++) { 929 930 /* copy current row into linked list */ 931 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 932 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 933 xi = aj + ai[r[prow]] + shift; 934 fill[n] = n; 935 fill[prow] = -1; /* marker to indicate if diagonal exists */ 936 while (nz--) { 937 fm = n; 938 idx = ic[*xi++ + shift]; 939 do { 940 m = fm; 941 fm = fill[m]; 942 } while (fm < idx); 943 fill[m] = idx; 944 fill[idx] = fm; 945 im[idx] = 0; 946 } 947 948 /* make sure diagonal entry is included */ 949 if (diagonal_fill && fill[prow] == -1) { 950 fm = n; 951 while (fill[fm] < prow) fm = fill[fm]; 952 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 953 fill[fm] = prow; 954 im[prow] = 0; 955 nzf++; 956 dcount++; 957 } 958 959 nzi = 0; 960 row = fill[n]; 961 while (row < prow) { 962 incrlev = im[row] + 1; 963 nz = dloc[row]; 964 xi = ajnew + ainew[row] + shift + nz + 1; 965 flev = ajfill + ainew[row] + shift + nz + 1; 966 nnz = ainew[row+1] - ainew[row] - nz - 1; 967 fm = row; 968 while (nnz-- > 0) { 969 idx = *xi++ + shift; 970 if (*flev + incrlev > levels) { 971 flev++; 972 continue; 973 } 974 do { 975 m = fm; 976 fm = fill[m]; 977 } while (fm < idx); 978 if (fm != idx) { 979 im[idx] = *flev + incrlev; 980 fill[m] = idx; 981 fill[idx] = fm; 982 fm = idx; 983 nzf++; 984 } else { 985 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 986 } 987 flev++; 988 } 989 row = fill[row]; 990 nzi++; 991 } 992 /* copy new filled row into permanent storage */ 993 ainew[prow+1] = ainew[prow] + nzf; 994 if (ainew[prow+1] > jmax-shift) { 995 996 /* estimate how much additional space we will need */ 997 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 998 /* just double the memory each time */ 999 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1000 int maxadd = jmax; 1001 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1002 jmax += maxadd; 1003 1004 /* allocate a longer ajnew and ajfill */ 1005 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1006 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1007 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1008 ajnew = xi; 1009 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1010 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1011 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1012 ajfill = xi; 1013 realloc++; /* count how many times we realloc */ 1014 } 1015 xi = ajnew + ainew[prow] + shift; 1016 flev = ajfill + ainew[prow] + shift; 1017 dloc[prow] = nzi; 1018 fm = fill[n]; 1019 while (nzf--) { 1020 *xi++ = fm - shift; 1021 *flev++ = im[fm]; 1022 fm = fill[fm]; 1023 } 1024 /* make sure row has diagonal entry */ 1025 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1026 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1027 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1028 } 1029 } 1030 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1033 ierr = PetscFree(fill);CHKERRQ(ierr); 1034 ierr = PetscFree(im);CHKERRQ(ierr); 1035 1036 { 1037 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1038 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1039 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1040 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1041 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1042 if (diagonal_fill) { 1043 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1044 } 1045 } 1046 1047 /* put together the new matrix */ 1048 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1049 PLogObjectParent(*fact,isicol); 1050 b = (Mat_SeqAIJ*)(*fact)->data; 1051 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1052 b->singlemalloc = PETSC_FALSE; 1053 len = (ainew[n] + shift)*sizeof(Scalar); 1054 /* the next line frees the default space generated by the Create() */ 1055 ierr = PetscFree(b->a);CHKERRQ(ierr); 1056 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1057 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1058 b->j = ajnew; 1059 b->i = ainew; 1060 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1061 b->diag = dloc; 1062 b->ilen = 0; 1063 b->imax = 0; 1064 b->row = isrow; 1065 b->col = iscol; 1066 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1068 b->icol = isicol; 1069 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1070 /* In b structure: Free imax, ilen, old a, old j. 1071 Allocate dloc, solve_work, new a, new j */ 1072 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1073 b->maxnz = b->nz = ainew[n] + shift; 1074 (*fact)->factor = FACTOR_LU; 1075 1076 (*fact)->info.factor_mallocs = realloc; 1077 (*fact)->info.fill_ratio_given = f; 1078 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1079 (*fact)->factor = FACTOR_LU; 1080 1081 PetscFunctionReturn(0); 1082 } 1083 1084 1085 1086 1087