1 /*$Id: aijfact.c,v 1.151 2000/05/16 22:04:11 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ /*<a name=""></a>*/"MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 21 EXTERN int Mat_AIJ_CheckInode(Mat); 22 23 EXTERN int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 EXTERN int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 EXTERN int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ /*<a name=""></a>*/"MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ2(1,1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 151 case -2: SETERRQ2(1,1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ /*<a name="MatLUFactorSymbolic_SeqAIJ""></a>*/"MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 PetscReal f; 277 278 PetscFunctionBegin; 279 PetscValidHeaderSpecific(isrow,IS_COOKIE); 280 PetscValidHeaderSpecific(iscol,IS_COOKIE); 281 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 282 283 if (!isrow) { 284 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 285 } 286 if (!iscol) { 287 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 288 } 289 290 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 291 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 292 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 293 294 /* get new row pointers */ 295 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 296 ainew[0] = -shift; 297 /* don't know how many column pointers are needed so estimate */ 298 if (info) f = info->fill; else f = 1.0; 299 jmax = (int)(f*ai[n]+(!shift)); 300 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 301 /* fill is a linked list of nonzeros in active row */ 302 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 303 im = fill + n + 1; 304 /* idnew is location of diagonal in factor */ 305 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 306 idnew[0] = -shift; 307 308 for (i=0; i<n; i++) { 309 /* first copy previous fill into linked list */ 310 nnz = nz = ai[r[i]+1] - ai[r[i]]; 311 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 312 ajtmp = aj + ai[r[i]] + shift; 313 fill[n] = n; 314 while (nz--) { 315 fm = n; 316 idx = ic[*ajtmp++ + shift]; 317 do { 318 m = fm; 319 fm = fill[m]; 320 } while (fm < idx); 321 fill[m] = idx; 322 fill[idx] = fm; 323 } 324 row = fill[n]; 325 while (row < i) { 326 ajtmp = ajnew + idnew[row] + (!shift); 327 nzbd = 1 + idnew[row] - ainew[row]; 328 nz = im[row] - nzbd; 329 fm = row; 330 while (nz-- > 0) { 331 idx = *ajtmp++ + shift; 332 nzbd++; 333 if (idx == i) im[row] = nzbd; 334 do { 335 m = fm; 336 fm = fill[m]; 337 } while (fm < idx); 338 if (fm != idx) { 339 fill[m] = idx; 340 fill[idx] = fm; 341 fm = idx; 342 nnz++; 343 } 344 } 345 row = fill[row]; 346 } 347 /* copy new filled row into permanent storage */ 348 ainew[i+1] = ainew[i] + nnz; 349 if (ainew[i+1] > jmax) { 350 351 /* estimate how much additional space we will need */ 352 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 353 /* just double the memory each time */ 354 int maxadd = jmax; 355 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 356 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 357 jmax += maxadd; 358 359 /* allocate a longer ajnew */ 360 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 361 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 362 ierr = PetscFree(ajnew);CHKERRQ(ierr); 363 ajnew = ajtmp; 364 realloc++; /* count how many times we realloc */ 365 } 366 ajtmp = ajnew + ainew[i] + shift; 367 fm = fill[n]; 368 nzi = 0; 369 im[i] = nnz; 370 while (nnz--) { 371 if (fm < i) nzi++; 372 *ajtmp++ = fm - shift; 373 fm = fill[fm]; 374 } 375 idnew[i] = ainew[i] + nzi; 376 } 377 if (ai[n] != 0) { 378 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 379 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 380 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 381 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 382 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 383 } else { 384 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 385 } 386 387 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 388 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 389 390 ierr = PetscFree(fill);CHKERRQ(ierr); 391 392 /* put together the new matrix */ 393 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 394 PLogObjectParent(*B,isicol); 395 b = (Mat_SeqAIJ*)(*B)->data; 396 ierr = PetscFree(b->imax);CHKERRQ(ierr); 397 b->singlemalloc = PETSC_FALSE; 398 /* the next line frees the default space generated by the Create() */ 399 ierr = PetscFree(b->a);CHKERRQ(ierr); 400 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 401 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 402 b->j = ajnew; 403 b->i = ainew; 404 b->diag = idnew; 405 b->ilen = 0; 406 b->imax = 0; 407 b->row = isrow; 408 b->col = iscol; 409 if (info) { 410 b->lu_damp = info->damp; 411 b->lu_damping = info->damping; 412 } else { 413 b->lu_damp = PETSC_FALSE; 414 b->lu_damping = 0.0; 415 } 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 420 /* In b structure: Free imax, ilen, old a, old j. 421 Allocate idnew, solve_work, new a, new j */ 422 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 423 b->maxnz = b->nz = ainew[n] + shift; 424 425 (*B)->factor = FACTOR_LU; 426 (*B)->info.factor_mallocs = realloc; 427 (*B)->info.fill_ratio_given = f; 428 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 429 430 if (ai[n] != 0) { 431 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 432 } else { 433 (*B)->info.fill_ratio_needed = 0.0; 434 } 435 PetscFunctionReturn(0); 436 } 437 /* ----------------------------------------------------------- */ 438 EXTERN int Mat_AIJ_CheckInode(Mat); 439 440 #undef __FUNC__ 441 #define __FUNC__ /*<a name="MatLUFactorNumeric_SeqAIJ"></a>*/"MatLUFactorNumeric_SeqAIJ" 442 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 443 { 444 Mat C = *B; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 446 IS isrow = b->row,isicol = b->icol; 447 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 448 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 449 int *diag_offset = b->diag,diag; 450 int *pj,ndamp = 0; 451 Scalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 452 PetscReal damping = b->lu_damping; 453 PetscTruth damp; 454 455 PetscFunctionBegin; 456 457 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 458 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 459 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 460 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 461 rtmps = rtmp + shift; ics = ic + shift; 462 463 do { 464 damp = PETSC_FALSE; 465 for (i=0; i<n; i++) { 466 nz = ai[i+1] - ai[i]; 467 ajtmp = aj + ai[i] + shift; 468 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 469 470 /* load in initial (unfactored row) */ 471 nz = a->i[r[i]+1] - a->i[r[i]]; 472 ajtmpold = a->j + a->i[r[i]] + shift; 473 v = a->a + a->i[r[i]] + shift; 474 for (j=0; j<nz; j++) { 475 rtmp[ics[ajtmpold[j]]] = v[j]; 476 if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */ 477 rtmp[ics[ajtmpold[j]]] += damping; 478 } 479 } 480 481 row = *ajtmp++ + shift; 482 while (row < i) { 483 pc = rtmp + row; 484 if (*pc != 0.0) { 485 pv = b->a + diag_offset[row] + shift; 486 pj = b->j + diag_offset[row] + (!shift); 487 multiplier = *pc / *pv++; 488 *pc = multiplier; 489 nz = ai[row+1] - diag_offset[row] - 1; 490 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 491 PLogFlops(2*nz); 492 } 493 row = *ajtmp++ + shift; 494 } 495 /* finished row so stick it into b->a */ 496 pv = b->a + ai[i] + shift; 497 pj = b->j + ai[i] + shift; 498 nz = ai[i+1] - ai[i]; 499 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 500 diag = diag_offset[i] - ai[i]; 501 if (PetscAbsScalar(pv[diag]) < 1.e-12) { 502 if (b->lu_damp) { 503 damp = PETSC_TRUE; 504 if (damping) damping *= 2.0; 505 else damping = 1.e-12; 506 ndamp++; 507 break; 508 } else { 509 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 510 } 511 } 512 } 513 } while (damp); 514 515 /* invert diagonal entries for simplier triangular solves */ 516 for (i=0; i<n; i++) { 517 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 518 } 519 520 ierr = PetscFree(rtmp);CHKERRQ(ierr); 521 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 522 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 523 C->factor = FACTOR_LU; 524 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 525 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 526 C->assembled = PETSC_TRUE; 527 PLogFlops(b->n); 528 if (ndamp) { 529 PLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 530 } 531 PetscFunctionReturn(0); 532 } 533 /* ----------------------------------------------------------- */ 534 #undef __FUNC__ 535 #define __FUNC__ /*<a name="MatLUFactor_SeqAIJ"></a>*/"MatLUFactor_SeqAIJ" 536 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info) 537 { 538 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 539 int ierr,refct; 540 Mat C; 541 PetscOps *Abops; 542 MatOps Aops; 543 544 PetscFunctionBegin; 545 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 546 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 547 548 /* free all the data structures from mat */ 549 ierr = PetscFree(mat->a);CHKERRQ(ierr); 550 if (!mat->singlemalloc) { 551 ierr = PetscFree(mat->i);CHKERRQ(ierr); 552 ierr = PetscFree(mat->j);CHKERRQ(ierr); 553 } 554 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 555 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 556 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 557 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 558 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 559 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 560 ierr = PetscFree(mat);CHKERRQ(ierr); 561 562 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 563 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 564 565 /* 566 This is horrible, horrible code. We need to keep the 567 A pointers for the bops and ops but copy everything 568 else from C. 569 */ 570 Abops = A->bops; 571 Aops = A->ops; 572 refct = A->refct; 573 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 574 mat = (Mat_SeqAIJ*)A->data; 575 PLogObjectParent(A,mat->icol); 576 577 A->bops = Abops; 578 A->ops = Aops; 579 A->qlist = 0; 580 A->refct = refct; 581 /* copy over the type_name and name */ 582 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 583 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 584 585 PetscHeaderDestroy(C); 586 587 PetscFunctionReturn(0); 588 } 589 /* ----------------------------------------------------------- */ 590 #undef __FUNC__ 591 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ" 592 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 593 { 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 IS iscol = a->col,isrow = a->row; 596 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 597 int nz,shift = a->indexshift,*rout,*cout; 598 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 599 600 PetscFunctionBegin; 601 if (!n) PetscFunctionReturn(0); 602 603 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 tmp = a->solve_work; 606 607 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 608 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 609 610 /* forward solve the lower triangular */ 611 tmp[0] = b[*r++]; 612 tmps = tmp + shift; 613 for (i=1; i<n; i++) { 614 v = aa + ai[i] + shift; 615 vi = aj + ai[i] + shift; 616 nz = a->diag[i] - ai[i]; 617 sum = b[*r++]; 618 while (nz--) sum -= *v++ * tmps[*vi++]; 619 tmp[i] = sum; 620 } 621 622 /* backward solve the upper triangular */ 623 for (i=n-1; i>=0; i--){ 624 v = aa + a->diag[i] + (!shift); 625 vi = aj + a->diag[i] + (!shift); 626 nz = ai[i+1] - a->diag[i] - 1; 627 sum = tmp[i]; 628 while (nz--) sum -= *v++ * tmps[*vi++]; 629 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 630 } 631 632 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 634 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 635 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 636 PLogFlops(2*a->nz - a->n); 637 PetscFunctionReturn(0); 638 } 639 640 /* ----------------------------------------------------------- */ 641 #undef __FUNC__ 642 #define __FUNC__ /*<a name=""></a>*/"MatSolve_SeqAIJ_NaturalOrdering" 643 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 644 { 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 647 Scalar *x,*b,*aa = a->a,sum; 648 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 649 int adiag_i,i,*vi,nz,ai_i; 650 Scalar *v; 651 #endif 652 653 PetscFunctionBegin; 654 if (!n) PetscFunctionReturn(0); 655 if (a->indexshift) { 656 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 661 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 662 663 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 664 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 665 #else 666 /* forward solve the lower triangular */ 667 x[0] = b[0]; 668 for (i=1; i<n; i++) { 669 ai_i = ai[i]; 670 v = aa + ai_i; 671 vi = aj + ai_i; 672 nz = adiag[i] - ai_i; 673 sum = b[i]; 674 while (nz--) sum -= *v++ * x[*vi++]; 675 x[i] = sum; 676 } 677 678 /* backward solve the upper triangular */ 679 for (i=n-1; i>=0; i--){ 680 adiag_i = adiag[i]; 681 v = aa + adiag_i + 1; 682 vi = aj + adiag_i + 1; 683 nz = ai[i+1] - adiag_i - 1; 684 sum = x[i]; 685 while (nz--) sum -= *v++ * x[*vi++]; 686 x[i] = sum*aa[adiag_i]; 687 } 688 #endif 689 PLogFlops(2*a->nz - a->n); 690 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 691 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ /*<a name=""></a>*/"MatSolveAdd_SeqAIJ" 697 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 698 { 699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 700 IS iscol = a->col,isrow = a->row; 701 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 702 int nz,shift = a->indexshift,*rout,*cout; 703 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 704 705 PetscFunctionBegin; 706 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 707 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 tmp = a->solve_work; 711 712 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 713 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 714 715 /* forward solve the lower triangular */ 716 tmp[0] = b[*r++]; 717 for (i=1; i<n; i++) { 718 v = aa + ai[i] + shift; 719 vi = aj + ai[i] + shift; 720 nz = a->diag[i] - ai[i]; 721 sum = b[*r++]; 722 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 723 tmp[i] = sum; 724 } 725 726 /* backward solve the upper triangular */ 727 for (i=n-1; i>=0; i--){ 728 v = aa + a->diag[i] + (!shift); 729 vi = aj + a->diag[i] + (!shift); 730 nz = ai[i+1] - a->diag[i] - 1; 731 sum = tmp[i]; 732 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 733 tmp[i] = sum*aa[a->diag[i]+shift]; 734 x[*c--] += tmp[i]; 735 } 736 737 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 739 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 740 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 741 PLogFlops(2*a->nz); 742 743 PetscFunctionReturn(0); 744 } 745 /* -------------------------------------------------------------------*/ 746 #undef __FUNC__ 747 #define __FUNC__ /*<a name=""></a>*/"MatSolveTranspose_SeqAIJ" 748 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 749 { 750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 751 IS iscol = a->col,isrow = a->row; 752 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 753 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 754 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 755 756 PetscFunctionBegin; 757 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 758 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 759 tmp = a->solve_work; 760 761 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 762 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 763 764 /* copy the b into temp work space according to permutation */ 765 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 766 767 /* forward solve the U^T */ 768 for (i=0; i<n; i++) { 769 v = aa + diag[i] + shift; 770 vi = aj + diag[i] + (!shift); 771 nz = ai[i+1] - diag[i] - 1; 772 s1 = tmp[i]; 773 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 774 while (nz--) { 775 tmp[*vi++ + shift] -= (*v++)*s1; 776 } 777 tmp[i] = s1; 778 } 779 780 /* backward solve the L^T */ 781 for (i=n-1; i>=0; i--){ 782 v = aa + diag[i] - 1 + shift; 783 vi = aj + diag[i] - 1 + shift; 784 nz = diag[i] - ai[i]; 785 s1 = tmp[i]; 786 while (nz--) { 787 tmp[*vi-- + shift] -= (*v--)*s1; 788 } 789 } 790 791 /* copy tmp into x according to permutation */ 792 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 793 794 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 796 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 797 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 798 799 PLogFlops(2*a->nz-a->n); 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNC__ 804 #define __FUNC__ /*<a name=""></a>*/"MatSolveTransposeAdd_SeqAIJ" 805 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 806 { 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 808 IS iscol = a->col,isrow = a->row; 809 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 810 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 811 Scalar *x,*b,*tmp,*aa = a->a,*v; 812 813 PetscFunctionBegin; 814 if (zz != xx) VecCopy(zz,xx); 815 816 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 tmp = a->solve_work; 819 820 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 821 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 822 823 /* copy the b into temp work space according to permutation */ 824 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 825 826 /* forward solve the U^T */ 827 for (i=0; i<n; i++) { 828 v = aa + diag[i] + shift; 829 vi = aj + diag[i] + (!shift); 830 nz = ai[i+1] - diag[i] - 1; 831 tmp[i] *= *v++; 832 while (nz--) { 833 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 834 } 835 } 836 837 /* backward solve the L^T */ 838 for (i=n-1; i>=0; i--){ 839 v = aa + diag[i] - 1 + shift; 840 vi = aj + diag[i] - 1 + shift; 841 nz = diag[i] - ai[i]; 842 while (nz--) { 843 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 844 } 845 } 846 847 /* copy tmp into x according to permutation */ 848 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 849 850 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 851 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 852 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 853 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 854 855 PLogFlops(2*a->nz); 856 PetscFunctionReturn(0); 857 } 858 /* ----------------------------------------------------------------*/ 859 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 860 861 #undef __FUNC__ 862 #define __FUNC__ /*<a name=""></a>*/"MatILUFactorSymbolic_SeqAIJ" 863 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 864 { 865 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 866 IS isicol; 867 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 868 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 869 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 870 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 871 PetscTruth col_identity,row_identity; 872 PetscReal f; 873 874 PetscFunctionBegin; 875 if (info) { 876 f = info->fill; 877 levels = (int)info->levels; 878 diagonal_fill = (int)info->diagonal_fill; 879 } else { 880 f = 1.0; 881 levels = 0; 882 diagonal_fill = 0; 883 } 884 if (!isrow) { 885 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);CHKERRQ(ierr); 886 } 887 if (!iscol) { 888 ierr = ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);CHKERRQ(ierr); 889 } 890 891 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 892 893 /* special case that simply copies fill pattern */ 894 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 895 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 896 if (!levels && row_identity && col_identity) { 897 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 898 (*fact)->factor = FACTOR_LU; 899 b = (Mat_SeqAIJ*)(*fact)->data; 900 if (!b->diag) { 901 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 902 } 903 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 904 b->row = isrow; 905 b->col = iscol; 906 b->icol = isicol; 907 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 908 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 909 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 910 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 915 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 916 917 /* get new row pointers */ 918 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 919 ainew[0] = -shift; 920 /* don't know how many column pointers are needed so estimate */ 921 jmax = (int)(f*(ai[n]+!shift)); 922 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 923 /* ajfill is level of fill for each fill entry */ 924 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 925 /* fill is a linked list of nonzeros in active row */ 926 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 927 /* im is level for each filled value */ 928 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 929 /* dloc is location of diagonal in factor */ 930 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 931 dloc[0] = 0; 932 for (prow=0; prow<n; prow++) { 933 934 /* copy current row into linked list */ 935 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 936 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 937 xi = aj + ai[r[prow]] + shift; 938 fill[n] = n; 939 fill[prow] = -1; /* marker to indicate if diagonal exists */ 940 while (nz--) { 941 fm = n; 942 idx = ic[*xi++ + shift]; 943 do { 944 m = fm; 945 fm = fill[m]; 946 } while (fm < idx); 947 fill[m] = idx; 948 fill[idx] = fm; 949 im[idx] = 0; 950 } 951 952 /* make sure diagonal entry is included */ 953 if (diagonal_fill && fill[prow] == -1) { 954 fm = n; 955 while (fill[fm] < prow) fm = fill[fm]; 956 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 957 fill[fm] = prow; 958 im[prow] = 0; 959 nzf++; 960 dcount++; 961 } 962 963 nzi = 0; 964 row = fill[n]; 965 while (row < prow) { 966 incrlev = im[row] + 1; 967 nz = dloc[row]; 968 xi = ajnew + ainew[row] + shift + nz + 1; 969 flev = ajfill + ainew[row] + shift + nz + 1; 970 nnz = ainew[row+1] - ainew[row] - nz - 1; 971 fm = row; 972 while (nnz-- > 0) { 973 idx = *xi++ + shift; 974 if (*flev + incrlev > levels) { 975 flev++; 976 continue; 977 } 978 do { 979 m = fm; 980 fm = fill[m]; 981 } while (fm < idx); 982 if (fm != idx) { 983 im[idx] = *flev + incrlev; 984 fill[m] = idx; 985 fill[idx] = fm; 986 fm = idx; 987 nzf++; 988 } else { 989 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 990 } 991 flev++; 992 } 993 row = fill[row]; 994 nzi++; 995 } 996 /* copy new filled row into permanent storage */ 997 ainew[prow+1] = ainew[prow] + nzf; 998 if (ainew[prow+1] > jmax-shift) { 999 1000 /* estimate how much additional space we will need */ 1001 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 1002 /* just double the memory each time */ 1003 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 1004 int maxadd = jmax; 1005 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1006 jmax += maxadd; 1007 1008 /* allocate a longer ajnew and ajfill */ 1009 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1010 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1011 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1012 ajnew = xi; 1013 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1014 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1015 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1016 ajfill = xi; 1017 realloc++; /* count how many times we realloc */ 1018 } 1019 xi = ajnew + ainew[prow] + shift; 1020 flev = ajfill + ainew[prow] + shift; 1021 dloc[prow] = nzi; 1022 fm = fill[n]; 1023 while (nzf--) { 1024 *xi++ = fm - shift; 1025 *flev++ = im[fm]; 1026 fm = fill[fm]; 1027 } 1028 /* make sure row has diagonal entry */ 1029 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1030 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1031 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1032 } 1033 } 1034 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1035 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1036 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1037 ierr = PetscFree(fill);CHKERRQ(ierr); 1038 ierr = PetscFree(im);CHKERRQ(ierr); 1039 1040 { 1041 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1042 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1043 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1044 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1045 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1046 if (diagonal_fill) { 1047 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1048 } 1049 } 1050 1051 /* put together the new matrix */ 1052 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1053 PLogObjectParent(*fact,isicol); 1054 b = (Mat_SeqAIJ*)(*fact)->data; 1055 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1056 b->singlemalloc = PETSC_FALSE; 1057 len = (ainew[n] + shift)*sizeof(Scalar); 1058 /* the next line frees the default space generated by the Create() */ 1059 ierr = PetscFree(b->a);CHKERRQ(ierr); 1060 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1061 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1062 b->j = ajnew; 1063 b->i = ainew; 1064 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1065 b->diag = dloc; 1066 b->ilen = 0; 1067 b->imax = 0; 1068 b->row = isrow; 1069 b->col = iscol; 1070 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1071 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1072 b->icol = isicol; 1073 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1074 /* In b structure: Free imax, ilen, old a, old j. 1075 Allocate dloc, solve_work, new a, new j */ 1076 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1077 b->maxnz = b->nz = ainew[n] + shift; 1078 (*fact)->factor = FACTOR_LU; 1079 1080 (*fact)->info.factor_mallocs = realloc; 1081 (*fact)->info.fill_ratio_given = f; 1082 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1083 (*fact)->factor = FACTOR_LU; 1084 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 1090 1091