1 /*$Id: aijfact.c,v 1.150 2000/05/10 16:40:36 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ /*<a name=""></a>*/"MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 21 EXTERN int Mat_AIJ_CheckInode(Mat); 22 23 EXTERN int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 EXTERN int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 EXTERN int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ /*<a name=""></a>*/"MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ /*<a name="MatLUFactorSymbolic_SeqAIJ""></a>*/"MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 PetscReal f; 277 278 PetscFunctionBegin; 279 PetscValidHeaderSpecific(isrow,IS_COOKIE); 280 PetscValidHeaderSpecific(iscol,IS_COOKIE); 281 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 282 283 if (!isrow) { 284 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 285 } 286 if (!iscol) { 287 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 288 } 289 290 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 291 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 292 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 293 294 /* get new row pointers */ 295 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 296 ainew[0] = -shift; 297 /* don't know how many column pointers are needed so estimate */ 298 if (info) f = info->fill; else f = 1.0; 299 jmax = (int)(f*ai[n]+(!shift)); 300 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 301 /* fill is a linked list of nonzeros in active row */ 302 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 303 im = fill + n + 1; 304 /* idnew is location of diagonal in factor */ 305 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 306 idnew[0] = -shift; 307 308 for (i=0; i<n; i++) { 309 /* first copy previous fill into linked list */ 310 nnz = nz = ai[r[i]+1] - ai[r[i]]; 311 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 312 ajtmp = aj + ai[r[i]] + shift; 313 fill[n] = n; 314 while (nz--) { 315 fm = n; 316 idx = ic[*ajtmp++ + shift]; 317 do { 318 m = fm; 319 fm = fill[m]; 320 } while (fm < idx); 321 fill[m] = idx; 322 fill[idx] = fm; 323 } 324 row = fill[n]; 325 while (row < i) { 326 ajtmp = ajnew + idnew[row] + (!shift); 327 nzbd = 1 + idnew[row] - ainew[row]; 328 nz = im[row] - nzbd; 329 fm = row; 330 while (nz-- > 0) { 331 idx = *ajtmp++ + shift; 332 nzbd++; 333 if (idx == i) im[row] = nzbd; 334 do { 335 m = fm; 336 fm = fill[m]; 337 } while (fm < idx); 338 if (fm != idx) { 339 fill[m] = idx; 340 fill[idx] = fm; 341 fm = idx; 342 nnz++; 343 } 344 } 345 row = fill[row]; 346 } 347 /* copy new filled row into permanent storage */ 348 ainew[i+1] = ainew[i] + nnz; 349 if (ainew[i+1] > jmax) { 350 351 /* estimate how much additional space we will need */ 352 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 353 /* just double the memory each time */ 354 int maxadd = jmax; 355 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 356 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 357 jmax += maxadd; 358 359 /* allocate a longer ajnew */ 360 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 361 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 362 ierr = PetscFree(ajnew);CHKERRQ(ierr); 363 ajnew = ajtmp; 364 realloc++; /* count how many times we realloc */ 365 } 366 ajtmp = ajnew + ainew[i] + shift; 367 fm = fill[n]; 368 nzi = 0; 369 im[i] = nnz; 370 while (nnz--) { 371 if (fm < i) nzi++; 372 *ajtmp++ = fm - shift; 373 fm = fill[fm]; 374 } 375 idnew[i] = ainew[i] + nzi; 376 } 377 if (ai[n] != 0) { 378 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 379 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 380 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 381 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 382 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 383 } else { 384 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 385 } 386 387 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 388 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 389 390 ierr = PetscFree(fill);CHKERRQ(ierr); 391 392 /* put together the new matrix */ 393 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 394 PLogObjectParent(*B,isicol); 395 b = (Mat_SeqAIJ*)(*B)->data; 396 ierr = PetscFree(b->imax);CHKERRQ(ierr); 397 b->singlemalloc = PETSC_FALSE; 398 /* the next line frees the default space generated by the Create() */ 399 ierr = PetscFree(b->a);CHKERRQ(ierr); 400 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 401 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 402 b->j = ajnew; 403 b->i = ainew; 404 b->diag = idnew; 405 b->ilen = 0; 406 b->imax = 0; 407 b->row = isrow; 408 b->col = iscol; 409 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 411 b->icol = isicol; 412 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 413 /* In b structure: Free imax, ilen, old a, old j. 414 Allocate idnew, solve_work, new a, new j */ 415 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 416 b->maxnz = b->nz = ainew[n] + shift; 417 418 (*B)->factor = FACTOR_LU; 419 (*B)->info.factor_mallocs = realloc; 420 (*B)->info.fill_ratio_given = f; 421 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 422 423 if (ai[n] != 0) { 424 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 425 } else { 426 (*B)->info.fill_ratio_needed = 0.0; 427 } 428 PetscFunctionReturn(0); 429 } 430 /* ----------------------------------------------------------- */ 431 EXTERN int Mat_AIJ_CheckInode(Mat); 432 433 #undef __FUNC__ 434 #define __FUNC__ /*<a name="MatLUFactorNumeric_SeqAIJ"></a>*/"MatLUFactorNumeric_SeqAIJ" 435 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 436 { 437 Mat C = *B; 438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 439 IS isrow = b->row,isicol = b->icol; 440 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 441 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 442 int *diag_offset = b->diag,diag,k; 443 int preserve_row_sums = (int)a->ilu_preserve_row_sums,*pj; 444 Scalar *rtmp,*v,*pc,multiplier,sum,inner_sum,*rowsums = 0,*pv,*rtmps,*u_values; 445 PetscReal ssum; 446 447 PetscFunctionBegin; 448 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 453 rtmps = rtmp + shift; ics = ic + shift; 454 455 /* precalculate row sums */ 456 if (preserve_row_sums) { 457 rowsums = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(rowsums); 458 for (i=0; i<n; i++) { 459 nz = a->i[r[i]+1] - a->i[r[i]]; 460 v = a->a + a->i[r[i]] + shift; 461 sum = 0.0; 462 for (j=0; j<nz; j++) sum += v[j]; 463 rowsums[i] = sum; 464 } 465 } 466 467 for (i=0; i<n; i++) { 468 nz = ai[i+1] - ai[i]; 469 ajtmp = aj + ai[i] + shift; 470 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 471 472 /* load in initial (unfactored row) */ 473 nz = a->i[r[i]+1] - a->i[r[i]]; 474 ajtmpold = a->j + a->i[r[i]] + shift; 475 v = a->a + a->i[r[i]] + shift; 476 for (j=0; j<nz; j++) rtmp[ics[ajtmpold[j]]] = v[j]; 477 478 row = *ajtmp++ + shift; 479 while (row < i) { 480 pc = rtmp + row; 481 if (*pc != 0.0) { 482 pv = b->a + diag_offset[row] + shift; 483 pj = b->j + diag_offset[row] + (!shift); 484 multiplier = *pc / *pv++; 485 *pc = multiplier; 486 nz = ai[row+1] - diag_offset[row] - 1; 487 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 488 PLogFlops(2*nz); 489 } 490 row = *ajtmp++ + shift; 491 } 492 /* finished row so stick it into b->a */ 493 pv = b->a + ai[i] + shift; 494 pj = b->j + ai[i] + shift; 495 nz = ai[i+1] - ai[i]; 496 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 497 diag = diag_offset[i] - ai[i]; 498 /* 499 Possibly adjust diagonal entry on current row to force 500 LU matrix to have same row sum as initial matrix. 501 */ 502 if (pv[diag] == 0.0) { 503 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 504 } 505 if (preserve_row_sums) { 506 pj = b->j + ai[i] + shift; 507 sum = rowsums[i]; 508 for (j=0; j<diag; j++) { 509 u_values = b->a + diag_offset[pj[j]] + shift; 510 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 511 inner_sum = 0.0; 512 for (k=0; k<nz; k++) { 513 inner_sum += u_values[k]; 514 } 515 sum -= pv[j]*inner_sum; 516 517 } 518 nz = ai[i+1] - diag_offset[i] - 1; 519 u_values = b->a + diag_offset[i] + 1 + shift; 520 for (k=0; k<nz; k++) { 521 sum -= u_values[k]; 522 } 523 ssum = PetscAbsScalar(sum/pv[diag]); 524 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 525 } 526 /* check pivot entry for current row */ 527 } 528 529 /* invert diagonal entries for simplier triangular solves */ 530 for (i=0; i<n; i++) { 531 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 532 } 533 534 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 535 ierr = PetscFree(rtmp);CHKERRQ(ierr); 536 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 538 C->factor = FACTOR_LU; 539 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 540 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 541 C->assembled = PETSC_TRUE; 542 PLogFlops(b->n); 543 PetscFunctionReturn(0); 544 } 545 /* ----------------------------------------------------------- */ 546 #undef __FUNC__ 547 #define __FUNC__ /*<a name="MatLUFactor_SeqAIJ"></a>*/"MatLUFactor_SeqAIJ" 548 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info) 549 { 550 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 551 int ierr,refct; 552 Mat C; 553 PetscOps *Abops; 554 MatOps Aops; 555 556 PetscFunctionBegin; 557 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 558 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 559 560 /* free all the data structures from mat */ 561 ierr = PetscFree(mat->a);CHKERRQ(ierr); 562 if (!mat->singlemalloc) { 563 ierr = PetscFree(mat->i);CHKERRQ(ierr); 564 ierr = PetscFree(mat->j);CHKERRQ(ierr); 565 } 566 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 567 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 568 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 569 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 570 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 571 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 572 ierr = PetscFree(mat);CHKERRQ(ierr); 573 574 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 575 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 576 577 /* 578 This is horrible, horrible code. We need to keep the 579 A pointers for the bops and ops but copy everything 580 else from C. 581 */ 582 Abops = A->bops; 583 Aops = A->ops; 584 refct = A->refct; 585 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 586 mat = (Mat_SeqAIJ*)A->data; 587 PLogObjectParent(A,mat->icol); 588 589 A->bops = Abops; 590 A->ops = Aops; 591 A->qlist = 0; 592 A->refct = refct; 593 /* copy over the type_name and name */ 594 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 595 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 596 597 PetscHeaderDestroy(C); 598 599 PetscFunctionReturn(0); 600 } 601 /* ----------------------------------------------------------- */ 602 #undef __FUNC__ 603 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ" 604 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 605 { 606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 607 IS iscol = a->col,isrow = a->row; 608 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 609 int nz,shift = a->indexshift,*rout,*cout; 610 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 611 612 PetscFunctionBegin; 613 if (!n) PetscFunctionReturn(0); 614 615 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 616 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 617 tmp = a->solve_work; 618 619 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 620 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 621 622 /* forward solve the lower triangular */ 623 tmp[0] = b[*r++]; 624 tmps = tmp + shift; 625 for (i=1; i<n; i++) { 626 v = aa + ai[i] + shift; 627 vi = aj + ai[i] + shift; 628 nz = a->diag[i] - ai[i]; 629 sum = b[*r++]; 630 while (nz--) sum -= *v++ * tmps[*vi++]; 631 tmp[i] = sum; 632 } 633 634 /* backward solve the upper triangular */ 635 for (i=n-1; i>=0; i--){ 636 v = aa + a->diag[i] + (!shift); 637 vi = aj + a->diag[i] + (!shift); 638 nz = ai[i+1] - a->diag[i] - 1; 639 sum = tmp[i]; 640 while (nz--) sum -= *v++ * tmps[*vi++]; 641 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 642 } 643 644 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 646 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 647 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 648 PLogFlops(2*a->nz - a->n); 649 PetscFunctionReturn(0); 650 } 651 652 /* ----------------------------------------------------------- */ 653 #undef __FUNC__ 654 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ_NaturalOrdering" 655 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 656 { 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 658 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 659 Scalar *x,*b,*aa = a->a,sum; 660 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 661 int adiag_i,i,*vi,nz,ai_i; 662 Scalar *v; 663 #endif 664 665 PetscFunctionBegin; 666 if (!n) PetscFunctionReturn(0); 667 if (a->indexshift) { 668 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 673 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 674 675 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 676 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 677 #else 678 /* forward solve the lower triangular */ 679 x[0] = b[0]; 680 for (i=1; i<n; i++) { 681 ai_i = ai[i]; 682 v = aa + ai_i; 683 vi = aj + ai_i; 684 nz = adiag[i] - ai_i; 685 sum = b[i]; 686 while (nz--) sum -= *v++ * x[*vi++]; 687 x[i] = sum; 688 } 689 690 /* backward solve the upper triangular */ 691 for (i=n-1; i>=0; i--){ 692 adiag_i = adiag[i]; 693 v = aa + adiag_i + 1; 694 vi = aj + adiag_i + 1; 695 nz = ai[i+1] - adiag_i - 1; 696 sum = x[i]; 697 while (nz--) sum -= *v++ * x[*vi++]; 698 x[i] = sum*aa[adiag_i]; 699 } 700 #endif 701 PLogFlops(2*a->nz - a->n); 702 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ /*<a name=""></a>*/"MatSolveAdd_SeqAIJ" 709 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 IS iscol = a->col,isrow = a->row; 713 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 714 int nz,shift = a->indexshift,*rout,*cout; 715 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 716 717 PetscFunctionBegin; 718 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 719 720 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722 tmp = a->solve_work; 723 724 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 725 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 726 727 /* forward solve the lower triangular */ 728 tmp[0] = b[*r++]; 729 for (i=1; i<n; i++) { 730 v = aa + ai[i] + shift; 731 vi = aj + ai[i] + shift; 732 nz = a->diag[i] - ai[i]; 733 sum = b[*r++]; 734 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 735 tmp[i] = sum; 736 } 737 738 /* backward solve the upper triangular */ 739 for (i=n-1; i>=0; i--){ 740 v = aa + a->diag[i] + (!shift); 741 vi = aj + a->diag[i] + (!shift); 742 nz = ai[i+1] - a->diag[i] - 1; 743 sum = tmp[i]; 744 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 745 tmp[i] = sum*aa[a->diag[i]+shift]; 746 x[*c--] += tmp[i]; 747 } 748 749 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 750 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 751 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 752 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 753 PLogFlops(2*a->nz); 754 755 PetscFunctionReturn(0); 756 } 757 /* -------------------------------------------------------------------*/ 758 #undef __FUNC__ 759 #define __FUNC__ /*<a name=""></a>*/"MatSolveTranspose_SeqAIJ" 760 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 761 { 762 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 763 IS iscol = a->col,isrow = a->row; 764 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 765 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 766 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 767 768 PetscFunctionBegin; 769 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 770 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 771 tmp = a->solve_work; 772 773 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 774 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 775 776 /* copy the b into temp work space according to permutation */ 777 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 778 779 /* forward solve the U^T */ 780 for (i=0; i<n; i++) { 781 v = aa + diag[i] + shift; 782 vi = aj + diag[i] + (!shift); 783 nz = ai[i+1] - diag[i] - 1; 784 s1 = tmp[i]; 785 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 786 while (nz--) { 787 tmp[*vi++ + shift] -= (*v++)*s1; 788 } 789 tmp[i] = s1; 790 } 791 792 /* backward solve the L^T */ 793 for (i=n-1; i>=0; i--){ 794 v = aa + diag[i] - 1 + shift; 795 vi = aj + diag[i] - 1 + shift; 796 nz = diag[i] - ai[i]; 797 s1 = tmp[i]; 798 while (nz--) { 799 tmp[*vi-- + shift] -= (*v--)*s1; 800 } 801 } 802 803 /* copy tmp into x according to permutation */ 804 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 805 806 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 807 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 808 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 809 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 810 811 PLogFlops(2*a->nz-a->n); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ /*<a name=""></a>*/"MatSolveTransposeAdd_SeqAIJ" 817 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 818 { 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 820 IS iscol = a->col,isrow = a->row; 821 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 822 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 823 Scalar *x,*b,*tmp,*aa = a->a,*v; 824 825 PetscFunctionBegin; 826 if (zz != xx) VecCopy(zz,xx); 827 828 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 tmp = a->solve_work; 831 832 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 833 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 834 835 /* copy the b into temp work space according to permutation */ 836 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 837 838 /* forward solve the U^T */ 839 for (i=0; i<n; i++) { 840 v = aa + diag[i] + shift; 841 vi = aj + diag[i] + (!shift); 842 nz = ai[i+1] - diag[i] - 1; 843 tmp[i] *= *v++; 844 while (nz--) { 845 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 846 } 847 } 848 849 /* backward solve the L^T */ 850 for (i=n-1; i>=0; i--){ 851 v = aa + diag[i] - 1 + shift; 852 vi = aj + diag[i] - 1 + shift; 853 nz = diag[i] - ai[i]; 854 while (nz--) { 855 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 856 } 857 } 858 859 /* copy tmp into x according to permutation */ 860 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 861 862 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 863 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 864 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 865 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 866 867 PLogFlops(2*a->nz); 868 PetscFunctionReturn(0); 869 } 870 /* ----------------------------------------------------------------*/ 871 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 872 873 #undef __FUNC__ 874 #define __FUNC__ /*<a name=""></a>*/"MatILUFactorSymbolic_SeqAIJ" 875 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 876 { 877 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 878 IS isicol; 879 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 880 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 881 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 882 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 883 PetscTruth col_identity,row_identity; 884 PetscReal f; 885 886 PetscFunctionBegin; 887 if (info) { 888 f = info->fill; 889 levels = (int)info->levels; 890 diagonal_fill = (int)info->diagonal_fill; 891 } else { 892 f = 1.0; 893 levels = 0; 894 diagonal_fill = 0; 895 } 896 if (!isrow) { 897 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 898 } 899 if (!iscol) { 900 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 901 } 902 903 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 904 905 /* special case that simply copies fill pattern */ 906 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 907 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 908 if (!levels && row_identity && col_identity) { 909 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 910 (*fact)->factor = FACTOR_LU; 911 b = (Mat_SeqAIJ*)(*fact)->data; 912 if (!b->diag) { 913 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 914 } 915 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 916 b->row = isrow; 917 b->col = iscol; 918 b->icol = isicol; 919 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 920 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 921 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 922 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 927 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 928 929 /* get new row pointers */ 930 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 931 ainew[0] = -shift; 932 /* don't know how many column pointers are needed so estimate */ 933 jmax = (int)(f*(ai[n]+!shift)); 934 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 935 /* ajfill is level of fill for each fill entry */ 936 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 937 /* fill is a linked list of nonzeros in active row */ 938 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 939 /* im is level for each filled value */ 940 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 941 /* dloc is location of diagonal in factor */ 942 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 943 dloc[0] = 0; 944 for (prow=0; prow<n; prow++) { 945 946 /* copy current row into linked list */ 947 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 948 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 949 xi = aj + ai[r[prow]] + shift; 950 fill[n] = n; 951 fill[prow] = -1; /* marker to indicate if diagonal exists */ 952 while (nz--) { 953 fm = n; 954 idx = ic[*xi++ + shift]; 955 do { 956 m = fm; 957 fm = fill[m]; 958 } while (fm < idx); 959 fill[m] = idx; 960 fill[idx] = fm; 961 im[idx] = 0; 962 } 963 964 /* make sure diagonal entry is included */ 965 if (diagonal_fill && fill[prow] == -1) { 966 fm = n; 967 while (fill[fm] < prow) fm = fill[fm]; 968 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 969 fill[fm] = prow; 970 im[prow] = 0; 971 nzf++; 972 dcount++; 973 } 974 975 nzi = 0; 976 row = fill[n]; 977 while (row < prow) { 978 incrlev = im[row] + 1; 979 nz = dloc[row]; 980 xi = ajnew + ainew[row] + shift + nz + 1; 981 flev = ajfill + ainew[row] + shift + nz + 1; 982 nnz = ainew[row+1] - ainew[row] - nz - 1; 983 fm = row; 984 while (nnz-- > 0) { 985 idx = *xi++ + shift; 986 if (*flev + incrlev > levels) { 987 flev++; 988 continue; 989 } 990 do { 991 m = fm; 992 fm = fill[m]; 993 } while (fm < idx); 994 if (fm != idx) { 995 im[idx] = *flev + incrlev; 996 fill[m] = idx; 997 fill[idx] = fm; 998 fm = idx; 999 nzf++; 1000 } else { 1001 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 1002 } 1003 flev++; 1004 } 1005 row = fill[row]; 1006 nzi++; 1007 } 1008 /* copy new filled row into permanent storage */ 1009 ainew[prow+1] = ainew[prow] + nzf; 1010 if (ainew[prow+1] > jmax-shift) { 1011 1012 /* estimate how much additional space we will need */ 1013 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1014 /* just double the memory each time */ 1015 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1016 int maxadd = jmax; 1017 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1018 jmax += maxadd; 1019 1020 /* allocate a longer ajnew and ajfill */ 1021 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1022 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1023 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1024 ajnew = xi; 1025 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1026 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1027 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1028 ajfill = xi; 1029 realloc++; /* count how many times we realloc */ 1030 } 1031 xi = ajnew + ainew[prow] + shift; 1032 flev = ajfill + ainew[prow] + shift; 1033 dloc[prow] = nzi; 1034 fm = fill[n]; 1035 while (nzf--) { 1036 *xi++ = fm - shift; 1037 *flev++ = im[fm]; 1038 fm = fill[fm]; 1039 } 1040 /* make sure row has diagonal entry */ 1041 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1042 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1043 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1044 } 1045 } 1046 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1047 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1048 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1049 ierr = PetscFree(fill);CHKERRQ(ierr); 1050 ierr = PetscFree(im);CHKERRQ(ierr); 1051 1052 { 1053 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1054 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1055 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1056 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1057 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1058 if (diagonal_fill) { 1059 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1060 } 1061 } 1062 1063 /* put together the new matrix */ 1064 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1065 PLogObjectParent(*fact,isicol); 1066 b = (Mat_SeqAIJ*)(*fact)->data; 1067 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1068 b->singlemalloc = PETSC_FALSE; 1069 len = (ainew[n] + shift)*sizeof(Scalar); 1070 /* the next line frees the default space generated by the Create() */ 1071 ierr = PetscFree(b->a);CHKERRQ(ierr); 1072 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1073 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1074 b->j = ajnew; 1075 b->i = ainew; 1076 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1077 b->diag = dloc; 1078 b->ilen = 0; 1079 b->imax = 0; 1080 b->row = isrow; 1081 b->col = iscol; 1082 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1084 b->icol = isicol; 1085 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1086 /* In b structure: Free imax, ilen, old a, old j. 1087 Allocate dloc, solve_work, new a, new j */ 1088 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1089 b->maxnz = b->nz = ainew[n] + shift; 1090 (*fact)->factor = FACTOR_LU; 1091 1092 (*fact)->info.factor_mallocs = realloc; 1093 (*fact)->info.fill_ratio_given = f; 1094 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1095 (*fact)->factor = FACTOR_LU; 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 1101 1102 1103