1 /*$Id: aijfact.c,v 1.152 2000/06/23 18:52:11 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ /*<a name=""></a>*/"MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 21 EXTERN int Mat_AIJ_CheckInode(Mat); 22 23 EXTERN int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 EXTERN int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 EXTERN int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ /*<a name=""></a>*/"MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ /*<a name="MatLUFactorSymbolic_SeqAIJ""></a>*/"MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 PetscReal f; 277 278 PetscFunctionBegin; 279 PetscValidHeaderSpecific(isrow,IS_COOKIE); 280 PetscValidHeaderSpecific(iscol,IS_COOKIE); 281 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 282 283 if (!isrow) { 284 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 285 } 286 if (!iscol) { 287 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 288 } 289 290 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 291 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 292 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 293 294 /* get new row pointers */ 295 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 296 ainew[0] = -shift; 297 /* don't know how many column pointers are needed so estimate */ 298 if (info) f = info->fill; else f = 1.0; 299 jmax = (int)(f*ai[n]+(!shift)); 300 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 301 /* fill is a linked list of nonzeros in active row */ 302 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 303 im = fill + n + 1; 304 /* idnew is location of diagonal in factor */ 305 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 306 idnew[0] = -shift; 307 308 for (i=0; i<n; i++) { 309 /* first copy previous fill into linked list */ 310 nnz = nz = ai[r[i]+1] - ai[r[i]]; 311 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 312 ajtmp = aj + ai[r[i]] + shift; 313 fill[n] = n; 314 while (nz--) { 315 fm = n; 316 idx = ic[*ajtmp++ + shift]; 317 do { 318 m = fm; 319 fm = fill[m]; 320 } while (fm < idx); 321 fill[m] = idx; 322 fill[idx] = fm; 323 } 324 row = fill[n]; 325 while (row < i) { 326 ajtmp = ajnew + idnew[row] + (!shift); 327 nzbd = 1 + idnew[row] - ainew[row]; 328 nz = im[row] - nzbd; 329 fm = row; 330 while (nz-- > 0) { 331 idx = *ajtmp++ + shift; 332 nzbd++; 333 if (idx == i) im[row] = nzbd; 334 do { 335 m = fm; 336 fm = fill[m]; 337 } while (fm < idx); 338 if (fm != idx) { 339 fill[m] = idx; 340 fill[idx] = fm; 341 fm = idx; 342 nnz++; 343 } 344 } 345 row = fill[row]; 346 } 347 /* copy new filled row into permanent storage */ 348 ainew[i+1] = ainew[i] + nnz; 349 if (ainew[i+1] > jmax) { 350 351 /* estimate how much additional space we will need */ 352 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 353 /* just double the memory each time */ 354 int maxadd = jmax; 355 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 356 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 357 jmax += maxadd; 358 359 /* allocate a longer ajnew */ 360 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 361 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 362 ierr = PetscFree(ajnew);CHKERRQ(ierr); 363 ajnew = ajtmp; 364 realloc++; /* count how many times we realloc */ 365 } 366 ajtmp = ajnew + ainew[i] + shift; 367 fm = fill[n]; 368 nzi = 0; 369 im[i] = nnz; 370 while (nnz--) { 371 if (fm < i) nzi++; 372 *ajtmp++ = fm - shift; 373 fm = fill[fm]; 374 } 375 idnew[i] = ainew[i] + nzi; 376 } 377 if (ai[n] != 0) { 378 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 379 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 380 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 381 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 382 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 383 } else { 384 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 385 } 386 387 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 388 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 389 390 ierr = PetscFree(fill);CHKERRQ(ierr); 391 392 /* put together the new matrix */ 393 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 394 PLogObjectParent(*B,isicol); 395 b = (Mat_SeqAIJ*)(*B)->data; 396 ierr = PetscFree(b->imax);CHKERRQ(ierr); 397 b->singlemalloc = PETSC_FALSE; 398 /* the next line frees the default space generated by the Create() */ 399 ierr = PetscFree(b->a);CHKERRQ(ierr); 400 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 401 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 402 b->j = ajnew; 403 b->i = ainew; 404 b->diag = idnew; 405 b->ilen = 0; 406 b->imax = 0; 407 b->row = isrow; 408 b->col = iscol; 409 if (info) { 410 b->lu_damp = info->damp; 411 b->lu_damping = info->damping; 412 } else { 413 b->lu_damp = PETSC_FALSE; 414 b->lu_damping = 0.0; 415 } 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 420 /* In b structure: Free imax, ilen, old a, old j. 421 Allocate idnew, solve_work, new a, new j */ 422 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 423 b->maxnz = b->nz = ainew[n] + shift; 424 425 (*B)->factor = FACTOR_LU; 426 (*B)->info.factor_mallocs = realloc; 427 (*B)->info.fill_ratio_given = f; 428 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 429 430 if (ai[n] != 0) { 431 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 432 } else { 433 (*B)->info.fill_ratio_needed = 0.0; 434 } 435 PetscFunctionReturn(0); 436 } 437 /* ----------------------------------------------------------- */ 438 EXTERN int Mat_AIJ_CheckInode(Mat); 439 440 #undef __FUNC__ 441 #define __FUNC__ /*<a name="MatLUFactorNumeric_SeqAIJ"></a>*/"MatLUFactorNumeric_SeqAIJ" 442 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 443 { 444 Mat C = *B; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 446 IS isrow = b->row,isicol = b->icol; 447 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 448 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 449 int *diag_offset = b->diag,diag; 450 int *pj,ndamp = 0; 451 Scalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 452 PetscReal damping = b->lu_damping; 453 PetscTruth damp; 454 455 PetscFunctionBegin; 456 457 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 458 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 459 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 460 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 461 rtmps = rtmp + shift; ics = ic + shift; 462 463 do { 464 damp = PETSC_FALSE; 465 for (i=0; i<n; i++) { 466 nz = ai[i+1] - ai[i]; 467 ajtmp = aj + ai[i] + shift; 468 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 469 470 /* load in initial (unfactored row) */ 471 nz = a->i[r[i]+1] - a->i[r[i]]; 472 ajtmpold = a->j + a->i[r[i]] + shift; 473 v = a->a + a->i[r[i]] + shift; 474 for (j=0; j<nz; j++) { 475 rtmp[ics[ajtmpold[j]]] = v[j]; 476 if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */ 477 rtmp[ics[ajtmpold[j]]] += damping; 478 } 479 } 480 481 row = *ajtmp++ + shift; 482 while (row < i) { 483 pc = rtmp + row; 484 if (*pc != 0.0) { 485 pv = b->a + diag_offset[row] + shift; 486 pj = b->j + diag_offset[row] + (!shift); 487 multiplier = *pc / *pv++; 488 *pc = multiplier; 489 nz = ai[row+1] - diag_offset[row] - 1; 490 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 491 PLogFlops(2*nz); 492 } 493 row = *ajtmp++ + shift; 494 } 495 /* finished row so stick it into b->a */ 496 pv = b->a + ai[i] + shift; 497 pj = b->j + ai[i] + shift; 498 nz = ai[i+1] - ai[i]; 499 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 500 diag = diag_offset[i] - ai[i]; 501 if (PetscAbsScalar(pv[diag]) < 1.e-12) { 502 if (b->lu_damp) { 503 damp = PETSC_TRUE; 504 if (damping) damping *= 2.0; 505 else damping = 1.e-12; 506 ndamp++; 507 break; 508 } else { 509 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 510 } 511 } 512 } 513 } while (damp); 514 515 /* invert diagonal entries for simplier triangular solves */ 516 for (i=0; i<n; i++) { 517 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 518 } 519 520 ierr = PetscFree(rtmp);CHKERRQ(ierr); 521 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 522 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 523 C->factor = FACTOR_LU; 524 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 525 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 526 C->assembled = PETSC_TRUE; 527 PLogFlops(b->n); 528 if (ndamp || b->lu_damping) { 529 PLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 530 } 531 PetscFunctionReturn(0); 532 } 533 /* ----------------------------------------------------------- */ 534 #undef __FUNC__ 535 #define __FUNC__ /*<a name="MatLUFactor_SeqAIJ"></a>*/"MatLUFactor_SeqAIJ" 536 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info) 537 { 538 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 539 int ierr,refct; 540 Mat C; 541 PetscOps *Abops; 542 MatOps Aops; 543 544 PetscFunctionBegin; 545 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 546 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 547 548 /* free all the data structures from mat */ 549 ierr = PetscFree(mat->a);CHKERRQ(ierr); 550 if (!mat->singlemalloc) { 551 ierr = PetscFree(mat->i);CHKERRQ(ierr); 552 ierr = PetscFree(mat->j);CHKERRQ(ierr); 553 } 554 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 555 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 556 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 557 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 558 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 559 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 560 ierr = PetscFree(mat);CHKERRQ(ierr); 561 562 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 563 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 564 565 /* 566 This is horrible, horrible code. We need to keep the 567 A pointers for the bops and ops but copy everything 568 else from C. 569 */ 570 Abops = A->bops; 571 Aops = A->ops; 572 refct = A->refct; 573 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 574 mat = (Mat_SeqAIJ*)A->data; 575 PLogObjectParent(A,mat->icol); 576 577 A->bops = Abops; 578 A->ops = Aops; 579 A->qlist = 0; 580 A->refct = refct; 581 /* copy over the type_name and name */ 582 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 583 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 584 585 PetscHeaderDestroy(C); 586 587 PetscFunctionReturn(0); 588 } 589 /* ----------------------------------------------------------- */ 590 #undef __FUNC__ 591 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ" 592 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 593 { 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 IS iscol = a->col,isrow = a->row; 596 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 597 int nz,shift = a->indexshift,*rout,*cout; 598 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 599 600 PetscFunctionBegin; 601 if (!n) PetscFunctionReturn(0); 602 603 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 tmp = a->solve_work; 606 607 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 608 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 609 610 /* forward solve the lower triangular */ 611 tmp[0] = b[*r++]; 612 tmps = tmp + shift; 613 for (i=1; i<n; i++) { 614 v = aa + ai[i] + shift; 615 vi = aj + ai[i] + shift; 616 nz = a->diag[i] - ai[i]; 617 sum = b[*r++]; 618 while (nz--) sum -= *v++ * tmps[*vi++]; 619 tmp[i] = sum; 620 } 621 622 /* backward solve the upper triangular */ 623 for (i=n-1; i>=0; i--){ 624 v = aa + a->diag[i] + (!shift); 625 vi = aj + a->diag[i] + (!shift); 626 nz = ai[i+1] - a->diag[i] - 1; 627 sum = tmp[i]; 628 while (nz--) sum -= *v++ * tmps[*vi++]; 629 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 630 } 631 632 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 634 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 635 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 636 PLogFlops(2*a->nz - a->n); 637 PetscFunctionReturn(0); 638 } 639 640 /* ----------------------------------------------------------- */ 641 #undef __FUNC__ 642 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ_NaturalOrdering" 643 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 644 { 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 647 Scalar *x,*b,*aa = a->a,sum; 648 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 649 int adiag_i,i,*vi,nz,ai_i; 650 Scalar *v; 651 #endif 652 653 PetscFunctionBegin; 654 if (!n) PetscFunctionReturn(0); 655 if (a->indexshift) { 656 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 662 663 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 664 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 665 #else 666 /* forward solve the lower triangular */ 667 x[0] = b[0]; 668 for (i=1; i<n; i++) { 669 ai_i = ai[i]; 670 v = aa + ai_i; 671 vi = aj + ai_i; 672 nz = adiag[i] - ai_i; 673 sum = b[i]; 674 while (nz--) sum -= *v++ * x[*vi++]; 675 x[i] = sum; 676 } 677 678 /* backward solve the upper triangular */ 679 for (i=n-1; i>=0; i--){ 680 adiag_i = adiag[i]; 681 v = aa + adiag_i + 1; 682 vi = aj + adiag_i + 1; 683 nz = ai[i+1] - adiag_i - 1; 684 sum = x[i]; 685 while (nz--) sum -= *v++ * x[*vi++]; 686 x[i] = sum*aa[adiag_i]; 687 } 688 #endif 689 PLogFlops(2*a->nz - a->n); 690 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 691 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ /*<a name=""></a>*/"MatSolveAdd_SeqAIJ" 697 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 698 { 699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 700 IS iscol = a->col,isrow = a->row; 701 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 702 int nz,shift = a->indexshift,*rout,*cout; 703 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 704 705 PetscFunctionBegin; 706 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 707 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 tmp = a->solve_work; 711 712 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 713 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 714 715 /* forward solve the lower triangular */ 716 tmp[0] = b[*r++]; 717 for (i=1; i<n; i++) { 718 v = aa + ai[i] + shift; 719 vi = aj + ai[i] + shift; 720 nz = a->diag[i] - ai[i]; 721 sum = b[*r++]; 722 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 723 tmp[i] = sum; 724 } 725 726 /* backward solve the upper triangular */ 727 for (i=n-1; i>=0; i--){ 728 v = aa + a->diag[i] + (!shift); 729 vi = aj + a->diag[i] + (!shift); 730 nz = ai[i+1] - a->diag[i] - 1; 731 sum = tmp[i]; 732 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 733 tmp[i] = sum*aa[a->diag[i]+shift]; 734 x[*c--] += tmp[i]; 735 } 736 737 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 739 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 740 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 741 PLogFlops(2*a->nz); 742 743 PetscFunctionReturn(0); 744 } 745 /* -------------------------------------------------------------------*/ 746 #undef __FUNC__ 747 #define __FUNC__ /*<a name=""></a>*/"MatSolveTranspose_SeqAIJ" 748 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 749 { 750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 751 IS iscol = a->col,isrow = a->row; 752 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 753 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 754 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 755 756 PetscFunctionBegin; 757 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 758 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 759 tmp = a->solve_work; 760 761 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 762 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 763 764 /* copy the b into temp work space according to permutation */ 765 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 766 767 /* forward solve the U^T */ 768 for (i=0; i<n; i++) { 769 v = aa + diag[i] + shift; 770 vi = aj + diag[i] + (!shift); 771 nz = ai[i+1] - diag[i] - 1; 772 s1 = tmp[i]; 773 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 774 while (nz--) { 775 tmp[*vi++ + shift] -= (*v++)*s1; 776 } 777 tmp[i] = s1; 778 } 779 780 /* backward solve the L^T */ 781 for (i=n-1; i>=0; i--){ 782 v = aa + diag[i] - 1 + shift; 783 vi = aj + diag[i] - 1 + shift; 784 nz = diag[i] - ai[i]; 785 s1 = tmp[i]; 786 while (nz--) { 787 tmp[*vi-- + shift] -= (*v--)*s1; 788 } 789 } 790 791 /* copy tmp into x according to permutation */ 792 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 793 794 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 796 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 797 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 798 799 PLogFlops(2*a->nz-a->n); 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNC__ 804 #define __FUNC__ /*<a name=""></a>*/"MatSolveTransposeAdd_SeqAIJ" 805 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 806 { 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 808 IS iscol = a->col,isrow = a->row; 809 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 810 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 811 Scalar *x,*b,*tmp,*aa = a->a,*v; 812 813 PetscFunctionBegin; 814 if (zz != xx) VecCopy(zz,xx); 815 816 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 tmp = a->solve_work; 819 820 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 821 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 822 823 /* copy the b into temp work space according to permutation */ 824 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 825 826 /* forward solve the U^T */ 827 for (i=0; i<n; i++) { 828 v = aa + diag[i] + shift; 829 vi = aj + diag[i] + (!shift); 830 nz = ai[i+1] - diag[i] - 1; 831 tmp[i] *= *v++; 832 while (nz--) { 833 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 834 } 835 } 836 837 /* backward solve the L^T */ 838 for (i=n-1; i>=0; i--){ 839 v = aa + diag[i] - 1 + shift; 840 vi = aj + diag[i] - 1 + shift; 841 nz = diag[i] - ai[i]; 842 while (nz--) { 843 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 844 } 845 } 846 847 /* copy tmp into x according to permutation */ 848 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 849 850 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 851 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 852 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 853 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 854 855 PLogFlops(2*a->nz); 856 PetscFunctionReturn(0); 857 } 858 /* ----------------------------------------------------------------*/ 859 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 860 861 #undef __FUNC__ 862 #define __FUNC__ /*<a name=""></a>*/"MatILUFactorSymbolic_SeqAIJ" 863 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 864 { 865 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 866 IS isicol; 867 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 868 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 869 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 870 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 871 PetscTruth col_identity,row_identity; 872 PetscReal f; 873 874 PetscFunctionBegin; 875 if (info) { 876 f = info->fill; 877 levels = (int)info->levels; 878 diagonal_fill = (int)info->diagonal_fill; 879 } else { 880 f = 1.0; 881 levels = 0; 882 diagonal_fill = 0; 883 } 884 if (!isrow) { 885 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 886 } 887 if (!iscol) { 888 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 889 } 890 891 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 892 893 /* special case that simply copies fill pattern */ 894 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 895 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 896 if (!levels && row_identity && col_identity) { 897 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 898 (*fact)->factor = FACTOR_LU; 899 b = (Mat_SeqAIJ*)(*fact)->data; 900 if (!b->diag) { 901 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 902 } 903 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 904 b->row = isrow; 905 b->col = iscol; 906 b->icol = isicol; 907 if (info) { 908 b->lu_damp = info->damp; 909 b->lu_damping = info->damping; 910 } else { 911 b->lu_damp = PETSC_FALSE; 912 b->lu_damping = 0.0; 913 } 914 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 915 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 916 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 922 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 923 924 /* get new row pointers */ 925 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 926 ainew[0] = -shift; 927 /* don't know how many column pointers are needed so estimate */ 928 jmax = (int)(f*(ai[n]+!shift)); 929 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 930 /* ajfill is level of fill for each fill entry */ 931 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 932 /* fill is a linked list of nonzeros in active row */ 933 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 934 /* im is level for each filled value */ 935 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 936 /* dloc is location of diagonal in factor */ 937 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 938 dloc[0] = 0; 939 for (prow=0; prow<n; prow++) { 940 941 /* copy current row into linked list */ 942 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 943 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 944 xi = aj + ai[r[prow]] + shift; 945 fill[n] = n; 946 fill[prow] = -1; /* marker to indicate if diagonal exists */ 947 while (nz--) { 948 fm = n; 949 idx = ic[*xi++ + shift]; 950 do { 951 m = fm; 952 fm = fill[m]; 953 } while (fm < idx); 954 fill[m] = idx; 955 fill[idx] = fm; 956 im[idx] = 0; 957 } 958 959 /* make sure diagonal entry is included */ 960 if (diagonal_fill && fill[prow] == -1) { 961 fm = n; 962 while (fill[fm] < prow) fm = fill[fm]; 963 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 964 fill[fm] = prow; 965 im[prow] = 0; 966 nzf++; 967 dcount++; 968 } 969 970 nzi = 0; 971 row = fill[n]; 972 while (row < prow) { 973 incrlev = im[row] + 1; 974 nz = dloc[row]; 975 xi = ajnew + ainew[row] + shift + nz + 1; 976 flev = ajfill + ainew[row] + shift + nz + 1; 977 nnz = ainew[row+1] - ainew[row] - nz - 1; 978 fm = row; 979 while (nnz-- > 0) { 980 idx = *xi++ + shift; 981 if (*flev + incrlev > levels) { 982 flev++; 983 continue; 984 } 985 do { 986 m = fm; 987 fm = fill[m]; 988 } while (fm < idx); 989 if (fm != idx) { 990 im[idx] = *flev + incrlev; 991 fill[m] = idx; 992 fill[idx] = fm; 993 fm = idx; 994 nzf++; 995 } else { 996 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 997 } 998 flev++; 999 } 1000 row = fill[row]; 1001 nzi++; 1002 } 1003 /* copy new filled row into permanent storage */ 1004 ainew[prow+1] = ainew[prow] + nzf; 1005 if (ainew[prow+1] > jmax-shift) { 1006 1007 /* estimate how much additional space we will need */ 1008 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1009 /* just double the memory each time */ 1010 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1011 int maxadd = jmax; 1012 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1013 jmax += maxadd; 1014 1015 /* allocate a longer ajnew and ajfill */ 1016 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1017 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1018 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1019 ajnew = xi; 1020 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1021 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1022 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1023 ajfill = xi; 1024 realloc++; /* count how many times we realloc */ 1025 } 1026 xi = ajnew + ainew[prow] + shift; 1027 flev = ajfill + ainew[prow] + shift; 1028 dloc[prow] = nzi; 1029 fm = fill[n]; 1030 while (nzf--) { 1031 *xi++ = fm - shift; 1032 *flev++ = im[fm]; 1033 fm = fill[fm]; 1034 } 1035 /* make sure row has diagonal entry */ 1036 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1037 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1038 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1039 } 1040 } 1041 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1042 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1043 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1044 ierr = PetscFree(fill);CHKERRQ(ierr); 1045 ierr = PetscFree(im);CHKERRQ(ierr); 1046 1047 { 1048 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1049 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1050 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1051 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1052 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1053 if (diagonal_fill) { 1054 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1055 } 1056 } 1057 1058 /* put together the new matrix */ 1059 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1060 PLogObjectParent(*fact,isicol); 1061 b = (Mat_SeqAIJ*)(*fact)->data; 1062 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1063 b->singlemalloc = PETSC_FALSE; 1064 len = (ainew[n] + shift)*sizeof(Scalar); 1065 /* the next line frees the default space generated by the Create() */ 1066 ierr = PetscFree(b->a);CHKERRQ(ierr); 1067 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1068 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1069 b->j = ajnew; 1070 b->i = ainew; 1071 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1072 b->diag = dloc; 1073 b->ilen = 0; 1074 b->imax = 0; 1075 b->row = isrow; 1076 b->col = iscol; 1077 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1078 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1079 b->icol = isicol; 1080 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1081 /* In b structure: Free imax, ilen, old a, old j. 1082 Allocate dloc, solve_work, new a, new j */ 1083 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1084 b->maxnz = b->nz = ainew[n] + shift; 1085 if (info) { 1086 b->lu_damp = info->damp; 1087 b->lu_damping = info->damping; 1088 } else { 1089 b->lu_damp = PETSC_FALSE; 1090 b->lu_damping = 0.0; 1091 } 1092 (*fact)->factor = FACTOR_LU; 1093 1094 (*fact)->info.factor_mallocs = realloc; 1095 (*fact)->info.fill_ratio_given = f; 1096 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1097 (*fact)->factor = FACTOR_LU; 1098 1099 PetscFunctionReturn(0); 1100 } 1101 1102 1103 1104 1105