1 /*$Id: aijfact.c,v 1.146 2000/03/14 17:29:40 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 extern int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 extern int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 extern int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,PetscReal f,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 277 PetscFunctionBegin; 278 PetscValidHeaderSpecific(isrow,IS_COOKIE); 279 PetscValidHeaderSpecific(iscol,IS_COOKIE); 280 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 281 282 if (!isrow) { 283 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 284 } 285 if (!iscol) { 286 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 287 } 288 289 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 290 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 291 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 292 293 /* get new row pointers */ 294 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 295 ainew[0] = -shift; 296 /* don't know how many column pointers are needed so estimate */ 297 jmax = (int)(f*ai[n]+(!shift)); 298 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 299 /* fill is a linked list of nonzeros in active row */ 300 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 301 im = fill + n + 1; 302 /* idnew is location of diagonal in factor */ 303 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 304 idnew[0] = -shift; 305 306 for (i=0; i<n; i++) { 307 /* first copy previous fill into linked list */ 308 nnz = nz = ai[r[i]+1] - ai[r[i]]; 309 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 310 ajtmp = aj + ai[r[i]] + shift; 311 fill[n] = n; 312 while (nz--) { 313 fm = n; 314 idx = ic[*ajtmp++ + shift]; 315 do { 316 m = fm; 317 fm = fill[m]; 318 } while (fm < idx); 319 fill[m] = idx; 320 fill[idx] = fm; 321 } 322 row = fill[n]; 323 while (row < i) { 324 ajtmp = ajnew + idnew[row] + (!shift); 325 nzbd = 1 + idnew[row] - ainew[row]; 326 nz = im[row] - nzbd; 327 fm = row; 328 while (nz-- > 0) { 329 idx = *ajtmp++ + shift; 330 nzbd++; 331 if (idx == i) im[row] = nzbd; 332 do { 333 m = fm; 334 fm = fill[m]; 335 } while (fm < idx); 336 if (fm != idx) { 337 fill[m] = idx; 338 fill[idx] = fm; 339 fm = idx; 340 nnz++; 341 } 342 } 343 row = fill[row]; 344 } 345 /* copy new filled row into permanent storage */ 346 ainew[i+1] = ainew[i] + nnz; 347 if (ainew[i+1] > jmax) { 348 349 /* estimate how much additional space we will need */ 350 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 351 /* just double the memory each time */ 352 int maxadd = jmax; 353 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 354 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 355 jmax += maxadd; 356 357 /* allocate a longer ajnew */ 358 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 359 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 360 ierr = PetscFree(ajnew);CHKERRQ(ierr); 361 ajnew = ajtmp; 362 realloc++; /* count how many times we realloc */ 363 } 364 ajtmp = ajnew + ainew[i] + shift; 365 fm = fill[n]; 366 nzi = 0; 367 im[i] = nnz; 368 while (nnz--) { 369 if (fm < i) nzi++; 370 *ajtmp++ = fm - shift; 371 fm = fill[fm]; 372 } 373 idnew[i] = ainew[i] + nzi; 374 } 375 if (ai[n] != 0) { 376 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 377 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 378 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 379 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 380 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 381 } else { 382 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 383 } 384 385 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 386 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 387 388 ierr = PetscFree(fill);CHKERRQ(ierr); 389 390 /* put together the new matrix */ 391 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 392 PLogObjectParent(*B,isicol); 393 b = (Mat_SeqAIJ*)(*B)->data; 394 ierr = PetscFree(b->imax);CHKERRQ(ierr); 395 b->singlemalloc = PETSC_FALSE; 396 /* the next line frees the default space generated by the Create() */ 397 ierr = PetscFree(b->a);CHKERRQ(ierr); 398 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 399 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 400 b->j = ajnew; 401 b->i = ainew; 402 b->diag = idnew; 403 b->ilen = 0; 404 b->imax = 0; 405 b->row = isrow; 406 b->col = iscol; 407 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 408 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 409 b->icol = isicol; 410 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 411 /* In b structure: Free imax, ilen, old a, old j. 412 Allocate idnew, solve_work, new a, new j */ 413 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 414 b->maxnz = b->nz = ainew[n] + shift; 415 416 (*B)->factor = FACTOR_LU; 417 (*B)->info.factor_mallocs = realloc; 418 (*B)->info.fill_ratio_given = f; 419 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 420 421 if (ai[n] != 0) { 422 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 423 } else { 424 (*B)->info.fill_ratio_needed = 0.0; 425 } 426 PetscFunctionReturn(0); 427 } 428 /* ----------------------------------------------------------- */ 429 extern int Mat_AIJ_CheckInode(Mat); 430 431 #undef __FUNC__ 432 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 433 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 434 { 435 Mat C = *B; 436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 437 IS isrow = b->row,isicol = b->icol; 438 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 439 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 440 int *diag_offset = b->diag,diag,k; 441 int preserve_row_sums = (int)a->ilu_preserve_row_sums,*pj; 442 Scalar *rtmp,*v,*pc,multiplier,sum,inner_sum,*rowsums = 0,*pv,*rtmps,*u_values; 443 PetscReal ssum; 444 445 PetscFunctionBegin; 446 447 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 448 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 449 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 450 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 451 rtmps = rtmp + shift; ics = ic + shift; 452 453 /* precalculate row sums */ 454 if (preserve_row_sums) { 455 rowsums = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(rowsums); 456 for (i=0; i<n; i++) { 457 nz = a->i[r[i]+1] - a->i[r[i]]; 458 v = a->a + a->i[r[i]] + shift; 459 sum = 0.0; 460 for (j=0; j<nz; j++) sum += v[j]; 461 rowsums[i] = sum; 462 } 463 } 464 465 for (i=0; i<n; i++) { 466 nz = ai[i+1] - ai[i]; 467 ajtmp = aj + ai[i] + shift; 468 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 469 470 /* load in initial (unfactored row) */ 471 nz = a->i[r[i]+1] - a->i[r[i]]; 472 ajtmpold = a->j + a->i[r[i]] + shift; 473 v = a->a + a->i[r[i]] + shift; 474 for (j=0; j<nz; j++) rtmp[ics[ajtmpold[j]]] = v[j]; 475 476 row = *ajtmp++ + shift; 477 while (row < i) { 478 pc = rtmp + row; 479 if (*pc != 0.0) { 480 pv = b->a + diag_offset[row] + shift; 481 pj = b->j + diag_offset[row] + (!shift); 482 multiplier = *pc / *pv++; 483 *pc = multiplier; 484 nz = ai[row+1] - diag_offset[row] - 1; 485 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 486 PLogFlops(2*nz); 487 } 488 row = *ajtmp++ + shift; 489 } 490 /* finished row so stick it into b->a */ 491 pv = b->a + ai[i] + shift; 492 pj = b->j + ai[i] + shift; 493 nz = ai[i+1] - ai[i]; 494 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 495 diag = diag_offset[i] - ai[i]; 496 /* 497 Possibly adjust diagonal entry on current row to force 498 LU matrix to have same row sum as initial matrix. 499 */ 500 if (pv[diag] == 0.0) { 501 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 502 } 503 if (preserve_row_sums) { 504 pj = b->j + ai[i] + shift; 505 sum = rowsums[i]; 506 for (j=0; j<diag; j++) { 507 u_values = b->a + diag_offset[pj[j]] + shift; 508 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 509 inner_sum = 0.0; 510 for (k=0; k<nz; k++) { 511 inner_sum += u_values[k]; 512 } 513 sum -= pv[j]*inner_sum; 514 515 } 516 nz = ai[i+1] - diag_offset[i] - 1; 517 u_values = b->a + diag_offset[i] + 1 + shift; 518 for (k=0; k<nz; k++) { 519 sum -= u_values[k]; 520 } 521 ssum = PetscAbsScalar(sum/pv[diag]); 522 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 523 } 524 /* check pivot entry for current row */ 525 } 526 527 /* invert diagonal entries for simplier triangular solves */ 528 for (i=0; i<n; i++) { 529 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 530 } 531 532 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 533 ierr = PetscFree(rtmp);CHKERRQ(ierr); 534 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 535 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 536 C->factor = FACTOR_LU; 537 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 538 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 539 C->assembled = PETSC_TRUE; 540 PLogFlops(b->n); 541 PetscFunctionReturn(0); 542 } 543 /* ----------------------------------------------------------- */ 544 #undef __FUNC__ 545 #define __FUNC__ "MatLUFactor_SeqAIJ" 546 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,PetscReal f) 547 { 548 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 549 int ierr,refct; 550 Mat C; 551 PetscOps *Abops; 552 MatOps Aops; 553 554 PetscFunctionBegin; 555 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 556 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 557 558 /* free all the data structures from mat */ 559 ierr = PetscFree(mat->a);CHKERRQ(ierr); 560 if (!mat->singlemalloc) { 561 ierr = PetscFree(mat->i);CHKERRQ(ierr); 562 ierr = PetscFree(mat->j);CHKERRQ(ierr); 563 } 564 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 565 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 566 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 567 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 568 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 569 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 570 ierr = PetscFree(mat);CHKERRQ(ierr); 571 572 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 573 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 574 575 /* 576 This is horrible, horrible code. We need to keep the 577 A pointers for the bops and ops but copy everything 578 else from C. 579 */ 580 Abops = A->bops; 581 Aops = A->ops; 582 refct = A->refct; 583 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 584 mat = (Mat_SeqAIJ*)A->data; 585 PLogObjectParent(A,mat->icol); 586 587 A->bops = Abops; 588 A->ops = Aops; 589 A->qlist = 0; 590 A->refct = refct; 591 /* copy over the type_name and name */ 592 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 593 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 594 595 PetscHeaderDestroy(C); 596 597 PetscFunctionReturn(0); 598 } 599 /* ----------------------------------------------------------- */ 600 #undef __FUNC__ 601 #define __FUNC__ "MatSolve_SeqAIJ" 602 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 603 { 604 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 605 IS iscol = a->col,isrow = a->row; 606 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 607 int nz,shift = a->indexshift,*rout,*cout; 608 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 609 610 PetscFunctionBegin; 611 if (!n) PetscFunctionReturn(0); 612 613 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 614 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 615 tmp = a->solve_work; 616 617 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 618 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 619 620 /* forward solve the lower triangular */ 621 tmp[0] = b[*r++]; 622 tmps = tmp + shift; 623 for (i=1; i<n; i++) { 624 v = aa + ai[i] + shift; 625 vi = aj + ai[i] + shift; 626 nz = a->diag[i] - ai[i]; 627 sum = b[*r++]; 628 while (nz--) sum -= *v++ * tmps[*vi++]; 629 tmp[i] = sum; 630 } 631 632 /* backward solve the upper triangular */ 633 for (i=n-1; i>=0; i--){ 634 v = aa + a->diag[i] + (!shift); 635 vi = aj + a->diag[i] + (!shift); 636 nz = ai[i+1] - a->diag[i] - 1; 637 sum = tmp[i]; 638 while (nz--) sum -= *v++ * tmps[*vi++]; 639 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 640 } 641 642 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 643 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 644 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 646 PLogFlops(2*a->nz - a->n); 647 PetscFunctionReturn(0); 648 } 649 650 /* ----------------------------------------------------------- */ 651 #undef __FUNC__ 652 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 653 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 654 { 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 656 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 657 Scalar *x,*b,*aa = a->a,sum; 658 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 659 int adiag_i,i,*vi,nz,ai_i; 660 Scalar *v; 661 #endif 662 663 PetscFunctionBegin; 664 if (!n) PetscFunctionReturn(0); 665 if (a->indexshift) { 666 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 671 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 672 673 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 674 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 675 #else 676 /* forward solve the lower triangular */ 677 x[0] = b[0]; 678 for (i=1; i<n; i++) { 679 ai_i = ai[i]; 680 v = aa + ai_i; 681 vi = aj + ai_i; 682 nz = adiag[i] - ai_i; 683 sum = b[i]; 684 while (nz--) sum -= *v++ * x[*vi++]; 685 x[i] = sum; 686 } 687 688 /* backward solve the upper triangular */ 689 for (i=n-1; i>=0; i--){ 690 adiag_i = adiag[i]; 691 v = aa + adiag_i + 1; 692 vi = aj + adiag_i + 1; 693 nz = ai[i+1] - adiag_i - 1; 694 sum = x[i]; 695 while (nz--) sum -= *v++ * x[*vi++]; 696 x[i] = sum*aa[adiag_i]; 697 } 698 #endif 699 PLogFlops(2*a->nz - a->n); 700 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 701 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNC__ 706 #define __FUNC__ "MatSolveAdd_SeqAIJ" 707 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 708 { 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 IS iscol = a->col,isrow = a->row; 711 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 712 int nz,shift = a->indexshift,*rout,*cout; 713 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 714 715 PetscFunctionBegin; 716 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 717 718 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 719 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 720 tmp = a->solve_work; 721 722 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 723 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 724 725 /* forward solve the lower triangular */ 726 tmp[0] = b[*r++]; 727 for (i=1; i<n; i++) { 728 v = aa + ai[i] + shift; 729 vi = aj + ai[i] + shift; 730 nz = a->diag[i] - ai[i]; 731 sum = b[*r++]; 732 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 733 tmp[i] = sum; 734 } 735 736 /* backward solve the upper triangular */ 737 for (i=n-1; i>=0; i--){ 738 v = aa + a->diag[i] + (!shift); 739 vi = aj + a->diag[i] + (!shift); 740 nz = ai[i+1] - a->diag[i] - 1; 741 sum = tmp[i]; 742 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 743 tmp[i] = sum*aa[a->diag[i]+shift]; 744 x[*c--] += tmp[i]; 745 } 746 747 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 749 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 750 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 751 PLogFlops(2*a->nz); 752 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------*/ 756 #undef __FUNC__ 757 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 758 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 759 { 760 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 761 IS iscol = a->col,isrow = a->row; 762 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 763 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 764 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 765 766 PetscFunctionBegin; 767 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 768 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 769 tmp = a->solve_work; 770 771 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 772 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 773 774 /* copy the b into temp work space according to permutation */ 775 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 776 777 /* forward solve the U^T */ 778 for (i=0; i<n; i++) { 779 v = aa + diag[i] + shift; 780 vi = aj + diag[i] + (!shift); 781 nz = ai[i+1] - diag[i] - 1; 782 s1 = tmp[i]; 783 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 784 while (nz--) { 785 tmp[*vi++ + shift] -= (*v++)*s1; 786 } 787 tmp[i] = s1; 788 } 789 790 /* backward solve the L^T */ 791 for (i=n-1; i>=0; i--){ 792 v = aa + diag[i] - 1 + shift; 793 vi = aj + diag[i] - 1 + shift; 794 nz = diag[i] - ai[i]; 795 s1 = tmp[i]; 796 while (nz--) { 797 tmp[*vi-- + shift] -= (*v--)*s1; 798 } 799 } 800 801 /* copy tmp into x according to permutation */ 802 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 803 804 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 805 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 806 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 807 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 808 809 PLogFlops(2*a->nz-a->n); 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNC__ 814 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 815 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 816 { 817 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 818 IS iscol = a->col,isrow = a->row; 819 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 820 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 821 Scalar *x,*b,*tmp,*aa = a->a,*v; 822 823 PetscFunctionBegin; 824 if (zz != xx) VecCopy(zz,xx); 825 826 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 827 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 828 tmp = a->solve_work; 829 830 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 831 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 832 833 /* copy the b into temp work space according to permutation */ 834 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 835 836 /* forward solve the U^T */ 837 for (i=0; i<n; i++) { 838 v = aa + diag[i] + shift; 839 vi = aj + diag[i] + (!shift); 840 nz = ai[i+1] - diag[i] - 1; 841 tmp[i] *= *v++; 842 while (nz--) { 843 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 844 } 845 } 846 847 /* backward solve the L^T */ 848 for (i=n-1; i>=0; i--){ 849 v = aa + diag[i] - 1 + shift; 850 vi = aj + diag[i] - 1 + shift; 851 nz = diag[i] - ai[i]; 852 while (nz--) { 853 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 854 } 855 } 856 857 /* copy tmp into x according to permutation */ 858 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 859 860 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 861 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 862 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 864 865 PLogFlops(2*a->nz); 866 PetscFunctionReturn(0); 867 } 868 /* ----------------------------------------------------------------*/ 869 extern int MatMissingDiagonal_SeqAIJ(Mat); 870 871 #undef __FUNC__ 872 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 873 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 874 { 875 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 876 IS isicol; 877 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 878 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 879 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 880 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 881 PetscTruth col_identity,row_identity; 882 PetscReal f; 883 884 PetscFunctionBegin; 885 if (info) { 886 f = info->fill; 887 levels = (int)info->levels; 888 diagonal_fill = (int)info->diagonal_fill; 889 } else { 890 f = 1.0; 891 levels = 0; 892 diagonal_fill = 0; 893 } 894 if (!isrow) { 895 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 896 } 897 if (!iscol) { 898 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 899 } 900 901 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 902 903 /* special case that simply copies fill pattern */ 904 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 905 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 906 if (!levels && row_identity && col_identity) { 907 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 908 (*fact)->factor = FACTOR_LU; 909 b = (Mat_SeqAIJ*)(*fact)->data; 910 if (!b->diag) { 911 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 912 } 913 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 914 b->row = isrow; 915 b->col = iscol; 916 b->icol = isicol; 917 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 918 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 919 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 925 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 926 927 /* get new row pointers */ 928 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 929 ainew[0] = -shift; 930 /* don't know how many column pointers are needed so estimate */ 931 jmax = (int)(f*(ai[n]+!shift)); 932 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 933 /* ajfill is level of fill for each fill entry */ 934 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 935 /* fill is a linked list of nonzeros in active row */ 936 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 937 /* im is level for each filled value */ 938 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 939 /* dloc is location of diagonal in factor */ 940 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 941 dloc[0] = 0; 942 for (prow=0; prow<n; prow++) { 943 944 /* copy current row into linked list */ 945 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 946 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 947 xi = aj + ai[r[prow]] + shift; 948 fill[n] = n; 949 fill[prow] = -1; /* marker to indicate if diagonal exists */ 950 while (nz--) { 951 fm = n; 952 idx = ic[*xi++ + shift]; 953 do { 954 m = fm; 955 fm = fill[m]; 956 } while (fm < idx); 957 fill[m] = idx; 958 fill[idx] = fm; 959 im[idx] = 0; 960 } 961 962 /* make sure diagonal entry is included */ 963 if (diagonal_fill && fill[prow] == -1) { 964 fm = n; 965 while (fill[fm] < prow) fm = fill[fm]; 966 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 967 fill[fm] = prow; 968 im[prow] = 0; 969 nzf++; 970 dcount++; 971 } 972 973 nzi = 0; 974 row = fill[n]; 975 while (row < prow) { 976 incrlev = im[row] + 1; 977 nz = dloc[row]; 978 xi = ajnew + ainew[row] + shift + nz + 1; 979 flev = ajfill + ainew[row] + shift + nz + 1; 980 nnz = ainew[row+1] - ainew[row] - nz - 1; 981 fm = row; 982 while (nnz-- > 0) { 983 idx = *xi++ + shift; 984 if (*flev + incrlev > levels) { 985 flev++; 986 continue; 987 } 988 do { 989 m = fm; 990 fm = fill[m]; 991 } while (fm < idx); 992 if (fm != idx) { 993 im[idx] = *flev + incrlev; 994 fill[m] = idx; 995 fill[idx] = fm; 996 fm = idx; 997 nzf++; 998 } else { 999 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1000 } 1001 flev++; 1002 } 1003 row = fill[row]; 1004 nzi++; 1005 } 1006 /* copy new filled row into permanent storage */ 1007 ainew[prow+1] = ainew[prow] + nzf; 1008 if (ainew[prow+1] > jmax-shift) { 1009 1010 /* estimate how much additional space we will need */ 1011 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1012 /* just double the memory each time */ 1013 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1014 int maxadd = jmax; 1015 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1016 jmax += maxadd; 1017 1018 /* allocate a longer ajnew and ajfill */ 1019 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1020 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1021 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1022 ajnew = xi; 1023 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1024 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1025 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1026 ajfill = xi; 1027 realloc++; /* count how many times we realloc */ 1028 } 1029 xi = ajnew + ainew[prow] + shift; 1030 flev = ajfill + ainew[prow] + shift; 1031 dloc[prow] = nzi; 1032 fm = fill[n]; 1033 while (nzf--) { 1034 *xi++ = fm - shift; 1035 *flev++ = im[fm]; 1036 fm = fill[fm]; 1037 } 1038 /* make sure row has diagonal entry */ 1039 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1040 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1041 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1042 } 1043 } 1044 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1045 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1046 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1047 ierr = PetscFree(fill);CHKERRQ(ierr); 1048 ierr = PetscFree(im);CHKERRQ(ierr); 1049 1050 { 1051 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1052 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1053 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1054 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1055 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1056 if (diagonal_fill) { 1057 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1058 } 1059 } 1060 1061 /* put together the new matrix */ 1062 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1063 PLogObjectParent(*fact,isicol); 1064 b = (Mat_SeqAIJ*)(*fact)->data; 1065 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1066 b->singlemalloc = PETSC_FALSE; 1067 len = (ainew[n] + shift)*sizeof(Scalar); 1068 /* the next line frees the default space generated by the Create() */ 1069 ierr = PetscFree(b->a);CHKERRQ(ierr); 1070 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1071 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1072 b->j = ajnew; 1073 b->i = ainew; 1074 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1075 b->diag = dloc; 1076 b->ilen = 0; 1077 b->imax = 0; 1078 b->row = isrow; 1079 b->col = iscol; 1080 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1082 b->icol = isicol; 1083 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1084 /* In b structure: Free imax, ilen, old a, old j. 1085 Allocate dloc, solve_work, new a, new j */ 1086 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1087 b->maxnz = b->nz = ainew[n] + shift; 1088 (*fact)->factor = FACTOR_LU; 1089 1090 (*fact)->info.factor_mallocs = realloc; 1091 (*fact)->info.fill_ratio_given = f; 1092 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1093 (*fact)->factor = FACTOR_LU; 1094 1095 PetscFunctionReturn(0); 1096 } 1097 1098 1099 1100 1101