1 /*$Id: aijfact.c,v 1.140 1999/12/19 00:47:09 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 20 extern int MatMarkDiagonal_SeqAIJ(Mat); 21 extern int Mat_AIJ_CheckInode(Mat); 22 23 extern int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*); 24 extern int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*); 25 extern int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*); 26 27 #undef __FUNC__ 28 #define __FUNC__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ishift = 0, for indices start at 1 49 ishift = 1, for indices starting at 0 50 ------------------------------------------------------------ 51 */ 52 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 56 IS iscolf,isicol,isirow; 57 PetscTruth reorder; 58 int *c,*r,*ic,ierr,i,n = a->m; 59 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 60 int *ordcol,*iwk,*iperm,*jw; 61 int ishift = !a->indexshift; 62 int jmax,lfill,job,*o_i,*o_j; 63 Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 64 PetscReal permtol,af; 65 66 PetscFunctionBegin; 67 68 if (info->dt == PETSC_DEFAULT) info->dt = .005; 69 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 70 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 71 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*info->dtcount))/a->nz; 72 lfill = (int)(info->dtcount/2.0); 73 jmax = (int)(info->fill*a->nz); 74 permtol = info->dtcol; 75 76 77 /* ------------------------------------------------------------ 78 If reorder=.TRUE., then the original matrix has to be 79 reordered to reflect the user selected ordering scheme, and 80 then de-reordered so it is in it's original format. 81 Because Saad's dperm() is NOT in place, we have to copy 82 the original matrix and allocate more storage. . . 83 ------------------------------------------------------------ 84 */ 85 86 /* set reorder to true if either isrow or iscol is not identity */ 87 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 88 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 89 reorder = PetscNot(reorder); 90 91 92 /* storage for ilu factor */ 93 new_i = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(new_i); 94 new_j = (int*) PetscMalloc(jmax*sizeof(int));CHKPTRQ(new_j); 95 new_a = (Scalar*)PetscMalloc(jmax*sizeof(Scalar));CHKPTRQ(new_a); 96 97 ordcol = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(ordcol); 98 99 /* ------------------------------------------------------------ 100 Make sure that everything is Fortran formatted (1-Based) 101 ------------------------------------------------------------ 102 */ 103 if (ishift) { 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 }; 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 old_i2 = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(old_i2); 120 old_j2 = (int*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int));CHKPTRQ(old_j2); 121 old_a2 = (Scalar*)PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar));CHKPTRQ(old_a2); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 iperm = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(iperm); 144 jw = (int*) PetscMalloc(2*n*sizeof(int));CHKPTRQ(jw); 145 w = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(w); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 148 if (ierr) { 149 switch (ierr) { 150 case -3: SETERRQ1(1,1,"ilutp(), matrix U overflows, need larger info->fill value %d",jmax); 151 case -2: SETERRQ1(1,1,"ilutp(), matrix L overflows, need larger info->fill value %d",jmax); 152 case -5: SETERRQ(1,1,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(1,1,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(1,1,"ilutp(), illegal info->fill value %d",jmax); 155 default: SETERRQ1(1,1,"ilutp(), zero pivot detected on row %d",ierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 wk = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(wk); 169 iwk = (int*) PetscMalloc((n+1)*sizeof(int));CHKPTRQ(iwk); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<=old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 if (ishift) { 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 } 196 197 /* Make the factored matrix 0-based */ 198 if (ishift) { 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 } 206 207 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 208 /*-- permute the right-hand-side and solution vectors --*/ 209 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 210 ierr = ISInvertPermutation(isrow,&isirow);CHKERRQ(ierr); 211 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 212 for(i=0; i<n; i++) { 213 ordcol[i] = ic[iperm[i]-1]; 214 }; 215 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216 ierr = ISDestroy(isicol);CHKERRQ(ierr); 217 218 ierr = PetscFree(iperm);CHKERRQ(ierr); 219 220 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 221 ierr = PetscFree(ordcol);CHKERRQ(ierr); 222 223 /*----- put together the new matrix -----*/ 224 225 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 ierr = PetscFree(b->imax);CHKERRQ(ierr); 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 /* the next line frees the default space generated by the MatCreate() */ 234 ierr = PetscFree(b->a);CHKERRQ(ierr); 235 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 236 b->a = new_a; 237 b->j = new_j; 238 b->i = new_i; 239 b->ilen = 0; 240 b->imax = 0; 241 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 242 b->row = isirow; 243 b->col = iscolf; 244 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 245 b->maxnz = b->nz = new_i[n]; 246 b->indexshift = a->indexshift; 247 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 248 (*fact)->info.factor_mallocs = 0; 249 250 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 251 252 /* check out for identical nodes. If found, use inode functions */ 253 ierr = Mat_AIJ_CheckInode(*fact);CHKERRQ(ierr); 254 255 af = ((double)b->nz)/((double)a->nz) + .001; 256 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 257 PLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 258 PLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 259 PLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 260 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Factorization code for AIJ format. 266 */ 267 #undef __FUNC__ 268 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 269 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,PetscReal f,Mat *B) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 272 IS isicol; 273 int *r,*ic,ierr,i,n = a->m,*ai = a->i,*aj = a->j; 274 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 275 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 276 277 PetscFunctionBegin; 278 PetscValidHeaderSpecific(isrow,IS_COOKIE); 279 PetscValidHeaderSpecific(iscol,IS_COOKIE); 280 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,0,"matrix must be square"); 281 282 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 283 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 285 286 /* get new row pointers */ 287 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 288 ainew[0] = -shift; 289 /* don't know how many column pointers are needed so estimate */ 290 jmax = (int)(f*ai[n]+(!shift)); 291 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 292 /* fill is a linked list of nonzeros in active row */ 293 fill = (int*)PetscMalloc((2*n+1)*sizeof(int));CHKPTRQ(fill); 294 im = fill + n + 1; 295 /* idnew is location of diagonal in factor */ 296 idnew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(idnew); 297 idnew[0] = -shift; 298 299 for (i=0; i<n; i++) { 300 /* first copy previous fill into linked list */ 301 nnz = nz = ai[r[i]+1] - ai[r[i]]; 302 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]] + shift; 304 fill[n] = n; 305 while (nz--) { 306 fm = n; 307 idx = ic[*ajtmp++ + shift]; 308 do { 309 m = fm; 310 fm = fill[m]; 311 } while (fm < idx); 312 fill[m] = idx; 313 fill[idx] = fm; 314 } 315 row = fill[n]; 316 while (row < i) { 317 ajtmp = ajnew + idnew[row] + (!shift); 318 nzbd = 1 + idnew[row] - ainew[row]; 319 nz = im[row] - nzbd; 320 fm = row; 321 while (nz-- > 0) { 322 idx = *ajtmp++ + shift; 323 nzbd++; 324 if (idx == i) im[row] = nzbd; 325 do { 326 m = fm; 327 fm = fill[m]; 328 } while (fm < idx); 329 if (fm != idx) { 330 fill[m] = idx; 331 fill[idx] = fm; 332 fm = idx; 333 nnz++; 334 } 335 } 336 row = fill[row]; 337 } 338 /* copy new filled row into permanent storage */ 339 ainew[i+1] = ainew[i] + nnz; 340 if (ainew[i+1] > jmax) { 341 342 /* estimate how much additional space we will need */ 343 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 344 /* just double the memory each time */ 345 int maxadd = jmax; 346 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 347 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 348 jmax += maxadd; 349 350 /* allocate a longer ajnew */ 351 ajtmp = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(ajtmp); 352 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 353 ierr = PetscFree(ajnew);CHKERRQ(ierr); 354 ajnew = ajtmp; 355 realloc++; /* count how many times we realloc */ 356 } 357 ajtmp = ajnew + ainew[i] + shift; 358 fm = fill[n]; 359 nzi = 0; 360 im[i] = nnz; 361 while (nnz--) { 362 if (fm < i) nzi++; 363 *ajtmp++ = fm - shift; 364 fm = fill[fm]; 365 } 366 idnew[i] = ainew[i] + nzi; 367 } 368 if (ai[n] != 0) { 369 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 370 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 371 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 372 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 373 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 374 } else { 375 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 ierr = PetscFree(fill);CHKERRQ(ierr); 382 383 /* put together the new matrix */ 384 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 385 PLogObjectParent(*B,isicol); 386 b = (Mat_SeqAIJ*)(*B)->data; 387 ierr = PetscFree(b->imax);CHKERRQ(ierr); 388 b->singlemalloc = PETSC_FALSE; 389 /* the next line frees the default space generated by the Create() */ 390 ierr = PetscFree(b->a);CHKERRQ(ierr); 391 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 392 b->a = (Scalar*)PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 393 b->j = ajnew; 394 b->i = ainew; 395 b->diag = idnew; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 404 /* In b structure: Free imax, ilen, old a, old j. 405 Allocate idnew, solve_work, new a, new j */ 406 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 407 b->maxnz = b->nz = ainew[n] + shift; 408 409 (*B)->factor = FACTOR_LU; 410 (*B)->info.factor_mallocs = realloc; 411 (*B)->info.fill_ratio_given = f; 412 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 413 414 if (ai[n] != 0) { 415 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 416 } else { 417 (*B)->info.fill_ratio_needed = 0.0; 418 } 419 PetscFunctionReturn(0); 420 } 421 /* ----------------------------------------------------------- */ 422 extern int Mat_AIJ_CheckInode(Mat); 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 426 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 427 { 428 Mat C = *B; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 430 IS isrow = b->row,isicol = b->icol; 431 int *r,*ic,ierr,i,j,n = a->m,*ai = b->i,*aj = b->j; 432 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 433 int *diag_offset = b->diag,diag,k; 434 int preserve_row_sums = (int)a->ilu_preserve_row_sums,*pj; 435 Scalar *rtmp,*v,*pc,multiplier,sum,inner_sum,*rowsums = 0,*pv,*rtmps,*u_values; 436 PetscReal ssum; 437 438 PetscFunctionBegin; 439 440 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 441 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 442 rtmp = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 443 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 444 rtmps = rtmp + shift; ics = ic + shift; 445 446 /* precalculate row sums */ 447 if (preserve_row_sums) { 448 rowsums = (Scalar*)PetscMalloc(n*sizeof(Scalar));CHKPTRQ(rowsums); 449 for (i=0; i<n; i++) { 450 nz = a->i[r[i]+1] - a->i[r[i]]; 451 v = a->a + a->i[r[i]] + shift; 452 sum = 0.0; 453 for (j=0; j<nz; j++) sum += v[j]; 454 rowsums[i] = sum; 455 } 456 } 457 458 for (i=0; i<n; i++) { 459 nz = ai[i+1] - ai[i]; 460 ajtmp = aj + ai[i] + shift; 461 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 462 463 /* load in initial (unfactored row) */ 464 nz = a->i[r[i]+1] - a->i[r[i]]; 465 ajtmpold = a->j + a->i[r[i]] + shift; 466 v = a->a + a->i[r[i]] + shift; 467 for (j=0; j<nz; j++) rtmp[ics[ajtmpold[j]]] = v[j]; 468 469 row = *ajtmp++ + shift; 470 while (row < i) { 471 pc = rtmp + row; 472 if (*pc != 0.0) { 473 pv = b->a + diag_offset[row] + shift; 474 pj = b->j + diag_offset[row] + (!shift); 475 multiplier = *pc / *pv++; 476 *pc = multiplier; 477 nz = ai[row+1] - diag_offset[row] - 1; 478 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 479 PLogFlops(2*nz); 480 } 481 row = *ajtmp++ + shift; 482 } 483 /* finished row so stick it into b->a */ 484 pv = b->a + ai[i] + shift; 485 pj = b->j + ai[i] + shift; 486 nz = ai[i+1] - ai[i]; 487 for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];} 488 diag = diag_offset[i] - ai[i]; 489 /* 490 Possibly adjust diagonal entry on current row to force 491 LU matrix to have same row sum as initial matrix. 492 */ 493 if (pv[diag] == 0.0) { 494 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 495 } 496 if (preserve_row_sums) { 497 pj = b->j + ai[i] + shift; 498 sum = rowsums[i]; 499 for (j=0; j<diag; j++) { 500 u_values = b->a + diag_offset[pj[j]] + shift; 501 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 502 inner_sum = 0.0; 503 for (k=0; k<nz; k++) { 504 inner_sum += u_values[k]; 505 } 506 sum -= pv[j]*inner_sum; 507 508 } 509 nz = ai[i+1] - diag_offset[i] - 1; 510 u_values = b->a + diag_offset[i] + 1 + shift; 511 for (k=0; k<nz; k++) { 512 sum -= u_values[k]; 513 } 514 ssum = PetscAbsScalar(sum/pv[diag]); 515 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 516 } 517 /* check pivot entry for current row */ 518 } 519 520 /* invert diagonal entries for simplier triangular solves */ 521 for (i=0; i<n; i++) { 522 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 523 } 524 525 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 526 ierr = PetscFree(rtmp);CHKERRQ(ierr); 527 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 528 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 529 C->factor = FACTOR_LU; 530 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 531 C->assembled = PETSC_TRUE; 532 PLogFlops(b->n); 533 PetscFunctionReturn(0); 534 } 535 /* ----------------------------------------------------------- */ 536 #undef __FUNC__ 537 #define __FUNC__ "MatLUFactor_SeqAIJ" 538 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,PetscReal f) 539 { 540 Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data; 541 int ierr,refct; 542 Mat C; 543 PetscOps *Abops; 544 MatOps Aops; 545 546 PetscFunctionBegin; 547 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 548 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 549 550 /* free all the data structures from mat */ 551 ierr = PetscFree(mat->a);CHKERRQ(ierr); 552 if (!mat->singlemalloc) { 553 ierr = PetscFree(mat->i);CHKERRQ(ierr); 554 ierr = PetscFree(mat->j);CHKERRQ(ierr); 555 } 556 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 557 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 558 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 559 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 560 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 561 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 562 ierr = PetscFree(mat);CHKERRQ(ierr); 563 564 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 565 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 566 567 /* 568 This is horrible, horrible code. We need to keep the 569 A pointers for the bops and ops but copy everything 570 else from C. 571 */ 572 Abops = A->bops; 573 Aops = A->ops; 574 refct = A->refct; 575 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 576 mat = (Mat_SeqAIJ*)A->data; 577 PLogObjectParent(A,mat->icol); 578 579 A->bops = Abops; 580 A->ops = Aops; 581 A->qlist = 0; 582 A->refct = refct; 583 /* copy over the type_name and name */ 584 ierr = PetscStrallocpy(C->type_name,&A->type_name);CHKERRQ(ierr); 585 ierr = PetscStrallocpy(C->name,&A->name);CHKERRQ(ierr); 586 587 PetscHeaderDestroy(C); 588 589 PetscFunctionReturn(0); 590 } 591 /* ----------------------------------------------------------- */ 592 #undef __FUNC__ 593 #define __FUNC__ "MatSolve_SeqAIJ" 594 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 IS iscol = a->col,isrow = a->row; 598 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 599 int nz,shift = a->indexshift,*rout,*cout; 600 Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 601 602 PetscFunctionBegin; 603 if (!n) PetscFunctionReturn(0); 604 605 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607 tmp = a->solve_work; 608 609 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 610 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 611 612 /* forward solve the lower triangular */ 613 tmp[0] = b[*r++]; 614 tmps = tmp + shift; 615 for (i=1; i<n; i++) { 616 v = aa + ai[i] + shift; 617 vi = aj + ai[i] + shift; 618 nz = a->diag[i] - ai[i]; 619 sum = b[*r++]; 620 while (nz--) sum -= *v++ * tmps[*vi++]; 621 tmp[i] = sum; 622 } 623 624 /* backward solve the upper triangular */ 625 for (i=n-1; i>=0; i--){ 626 v = aa + a->diag[i] + (!shift); 627 vi = aj + a->diag[i] + (!shift); 628 nz = ai[i+1] - a->diag[i] - 1; 629 sum = tmp[i]; 630 while (nz--) sum -= *v++ * tmps[*vi++]; 631 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 632 } 633 634 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 635 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 636 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 637 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 638 PLogFlops(2*a->nz - a->n); 639 PetscFunctionReturn(0); 640 } 641 642 /* ----------------------------------------------------------- */ 643 #undef __FUNC__ 644 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 645 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 int n = a->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 649 Scalar *x,*b,*aa = a->a,sum; 650 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 651 int adiag_i,i,*vi,nz,ai_i; 652 Scalar *v; 653 #endif 654 655 PetscFunctionBegin; 656 if (!n) PetscFunctionReturn(0); 657 if (a->indexshift) { 658 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 665 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 666 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 667 #else 668 /* forward solve the lower triangular */ 669 x[0] = b[0]; 670 for (i=1; i<n; i++) { 671 ai_i = ai[i]; 672 v = aa + ai_i; 673 vi = aj + ai_i; 674 nz = adiag[i] - ai_i; 675 sum = b[i]; 676 while (nz--) sum -= *v++ * x[*vi++]; 677 x[i] = sum; 678 } 679 680 /* backward solve the upper triangular */ 681 for (i=n-1; i>=0; i--){ 682 adiag_i = adiag[i]; 683 v = aa + adiag_i + 1; 684 vi = aj + adiag_i + 1; 685 nz = ai[i+1] - adiag_i - 1; 686 sum = x[i]; 687 while (nz--) sum -= *v++ * x[*vi++]; 688 x[i] = sum*aa[adiag_i]; 689 } 690 #endif 691 PLogFlops(2*a->nz - a->n); 692 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatSolveAdd_SeqAIJ" 699 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 700 { 701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 702 IS iscol = a->col,isrow = a->row; 703 int *r,*c,ierr,i, n = a->m,*vi,*ai = a->i,*aj = a->j; 704 int nz,shift = a->indexshift,*rout,*cout; 705 Scalar *x,*b,*tmp,*aa = a->a,sum,*v; 706 707 PetscFunctionBegin; 708 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 709 710 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 tmp = a->solve_work; 713 714 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 715 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 716 717 /* forward solve the lower triangular */ 718 tmp[0] = b[*r++]; 719 for (i=1; i<n; i++) { 720 v = aa + ai[i] + shift; 721 vi = aj + ai[i] + shift; 722 nz = a->diag[i] - ai[i]; 723 sum = b[*r++]; 724 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 725 tmp[i] = sum; 726 } 727 728 /* backward solve the upper triangular */ 729 for (i=n-1; i>=0; i--){ 730 v = aa + a->diag[i] + (!shift); 731 vi = aj + a->diag[i] + (!shift); 732 nz = ai[i+1] - a->diag[i] - 1; 733 sum = tmp[i]; 734 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 735 tmp[i] = sum*aa[a->diag[i]+shift]; 736 x[*c--] += tmp[i]; 737 } 738 739 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 740 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 741 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 PLogFlops(2*a->nz); 744 745 PetscFunctionReturn(0); 746 } 747 /* -------------------------------------------------------------------*/ 748 #undef __FUNC__ 749 #define __FUNC__ "MatSolveTranspose_SeqAIJ" 750 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 751 { 752 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 753 IS iscol = a->col,isrow = a->row; 754 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 755 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 756 Scalar *x,*b,*tmp,*aa = a->a,*v,s1; 757 758 PetscFunctionBegin; 759 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 760 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761 tmp = a->solve_work; 762 763 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 764 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 765 766 /* copy the b into temp work space according to permutation */ 767 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 768 769 /* forward solve the U^T */ 770 for (i=0; i<n; i++) { 771 v = aa + diag[i] + shift; 772 vi = aj + diag[i] + (!shift); 773 nz = ai[i+1] - diag[i] - 1; 774 s1 = tmp[i]; 775 s1 *= *(v++); /* multiply by inverse of diagonal entry */ 776 while (nz--) { 777 tmp[*vi++ + shift] -= (*v++)*s1; 778 } 779 tmp[i] = s1; 780 } 781 782 /* backward solve the L^T */ 783 for (i=n-1; i>=0; i--){ 784 v = aa + diag[i] - 1 + shift; 785 vi = aj + diag[i] - 1 + shift; 786 nz = diag[i] - ai[i]; 787 s1 = tmp[i]; 788 while (nz--) { 789 tmp[*vi-- + shift] -= (*v--)*s1; 790 } 791 } 792 793 /* copy tmp into x according to permutation */ 794 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 795 796 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 797 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 798 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 801 PLogFlops(2*a->nz-a->n); 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNC__ 806 #define __FUNC__ "MatSolveTransposeAdd_SeqAIJ" 807 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 808 { 809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 810 IS iscol = a->col,isrow = a->row; 811 int *r,*c,ierr,i,n = a->m,*vi,*ai = a->i,*aj = a->j; 812 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 813 Scalar *x,*b,*tmp,*aa = a->a,*v; 814 815 PetscFunctionBegin; 816 if (zz != xx) VecCopy(zz,xx); 817 818 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 819 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 820 tmp = a->solve_work; 821 822 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 823 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 824 825 /* copy the b into temp work space according to permutation */ 826 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 827 828 /* forward solve the U^T */ 829 for (i=0; i<n; i++) { 830 v = aa + diag[i] + shift; 831 vi = aj + diag[i] + (!shift); 832 nz = ai[i+1] - diag[i] - 1; 833 tmp[i] *= *v++; 834 while (nz--) { 835 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 836 } 837 } 838 839 /* backward solve the L^T */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + diag[i] - 1 + shift; 842 vi = aj + diag[i] - 1 + shift; 843 nz = diag[i] - ai[i]; 844 while (nz--) { 845 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 846 } 847 } 848 849 /* copy tmp into x according to permutation */ 850 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 851 852 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 853 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 854 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 855 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 856 857 PLogFlops(2*a->nz); 858 PetscFunctionReturn(0); 859 } 860 /* ----------------------------------------------------------------*/ 861 extern int MatMissingDiagonal_SeqAIJ(Mat); 862 863 #undef __FUNC__ 864 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 865 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 866 { 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 868 IS isicol; 869 int *r,*ic,ierr,prow,n = a->m,*ai = a->i,*aj = a->j; 870 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 871 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 872 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 873 PetscTruth col_identity,row_identity; 874 PetscReal f; 875 876 PetscFunctionBegin; 877 if (info) { 878 f = info->fill; 879 levels = (int)info->levels; 880 diagonal_fill = (int)info->diagonal_fill; 881 } else { 882 f = 1.0; 883 levels = 0; 884 diagonal_fill = 0; 885 } 886 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 887 888 /* special case that simply copies fill pattern */ 889 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 890 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 891 if (!levels && row_identity && col_identity) { 892 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 893 (*fact)->factor = FACTOR_LU; 894 b = (Mat_SeqAIJ*)(*fact)->data; 895 if (!b->diag) { 896 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 897 } 898 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 899 b->row = isrow; 900 b->col = iscol; 901 b->icol = isicol; 902 b->solve_work = (Scalar*)PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 903 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 904 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 905 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 910 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 911 912 /* get new row pointers */ 913 ainew = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(ainew); 914 ainew[0] = -shift; 915 /* don't know how many column pointers are needed so estimate */ 916 jmax = (int)(f*(ai[n]+!shift)); 917 ajnew = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajnew); 918 /* ajfill is level of fill for each fill entry */ 919 ajfill = (int*)PetscMalloc((jmax)*sizeof(int));CHKPTRQ(ajfill); 920 /* fill is a linked list of nonzeros in active row */ 921 fill = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(fill); 922 /* im is level for each filled value */ 923 im = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(im); 924 /* dloc is location of diagonal in factor */ 925 dloc = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(dloc); 926 dloc[0] = 0; 927 for (prow=0; prow<n; prow++) { 928 929 /* copy current row into linked list */ 930 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 931 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 932 xi = aj + ai[r[prow]] + shift; 933 fill[n] = n; 934 fill[prow] = -1; /* marker to indicate if diagonal exists */ 935 while (nz--) { 936 fm = n; 937 idx = ic[*xi++ + shift]; 938 do { 939 m = fm; 940 fm = fill[m]; 941 } while (fm < idx); 942 fill[m] = idx; 943 fill[idx] = fm; 944 im[idx] = 0; 945 } 946 947 /* make sure diagonal entry is included */ 948 if (diagonal_fill && fill[prow] == -1) { 949 fm = n; 950 while (fill[fm] < prow) fm = fill[fm]; 951 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 952 fill[fm] = prow; 953 im[prow] = 0; 954 nzf++; 955 dcount++; 956 } 957 958 nzi = 0; 959 row = fill[n]; 960 while (row < prow) { 961 incrlev = im[row] + 1; 962 nz = dloc[row]; 963 xi = ajnew + ainew[row] + shift + nz + 1; 964 flev = ajfill + ainew[row] + shift + nz + 1; 965 nnz = ainew[row+1] - ainew[row] - nz - 1; 966 fm = row; 967 while (nnz-- > 0) { 968 idx = *xi++ + shift; 969 if (*flev + incrlev > levels) { 970 flev++; 971 continue; 972 } 973 do { 974 m = fm; 975 fm = fill[m]; 976 } while (fm < idx); 977 if (fm != idx) { 978 im[idx] = *flev + incrlev; 979 fill[m] = idx; 980 fill[idx] = fm; 981 fm = idx; 982 nzf++; 983 } else { 984 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 985 } 986 flev++; 987 } 988 row = fill[row]; 989 nzi++; 990 } 991 /* copy new filled row into permanent storage */ 992 ainew[prow+1] = ainew[prow] + nzf; 993 if (ainew[prow+1] > jmax-shift) { 994 995 /* estimate how much additional space we will need */ 996 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 997 /* just double the memory each time */ 998 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 999 int maxadd = jmax; 1000 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 1001 jmax += maxadd; 1002 1003 /* allocate a longer ajnew and ajfill */ 1004 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1005 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1006 ierr = PetscFree(ajnew);CHKERRQ(ierr); 1007 ajnew = xi; 1008 xi = (int*)PetscMalloc(jmax*sizeof(int));CHKPTRQ(xi); 1009 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 1010 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1011 ajfill = xi; 1012 realloc++; /* count how many times we realloc */ 1013 } 1014 xi = ajnew + ainew[prow] + shift; 1015 flev = ajfill + ainew[prow] + shift; 1016 dloc[prow] = nzi; 1017 fm = fill[n]; 1018 while (nzf--) { 1019 *xi++ = fm - shift; 1020 *flev++ = im[fm]; 1021 fm = fill[fm]; 1022 } 1023 /* make sure row has diagonal entry */ 1024 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 1025 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 1026 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 1027 } 1028 } 1029 ierr = PetscFree(ajfill);CHKERRQ(ierr); 1030 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1031 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1032 ierr = PetscFree(fill);CHKERRQ(ierr); 1033 ierr = PetscFree(im);CHKERRQ(ierr); 1034 1035 { 1036 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 1037 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 1038 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 1039 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 1040 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1041 if (diagonal_fill) { 1042 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 1043 } 1044 } 1045 1046 /* put together the new matrix */ 1047 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 1048 PLogObjectParent(*fact,isicol); 1049 b = (Mat_SeqAIJ*)(*fact)->data; 1050 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1051 b->singlemalloc = PETSC_FALSE; 1052 len = (ainew[n] + shift)*sizeof(Scalar); 1053 /* the next line frees the default space generated by the Create() */ 1054 ierr = PetscFree(b->a);CHKERRQ(ierr); 1055 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1056 b->a = (Scalar*)PetscMalloc(len+1);CHKPTRQ(b->a); 1057 b->j = ajnew; 1058 b->i = ainew; 1059 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1060 b->diag = dloc; 1061 b->ilen = 0; 1062 b->imax = 0; 1063 b->row = isrow; 1064 b->col = iscol; 1065 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1066 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1067 b->icol = isicol; 1068 b->solve_work = (Scalar*)PetscMalloc((n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 1069 /* In b structure: Free imax, ilen, old a, old j. 1070 Allocate dloc, solve_work, new a, new j */ 1071 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 1072 b->maxnz = b->nz = ainew[n] + shift; 1073 (*fact)->factor = FACTOR_LU; 1074 1075 (*fact)->info.factor_mallocs = realloc; 1076 (*fact)->info.fill_ratio_given = f; 1077 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1078 (*fact)->factor = FACTOR_LU; 1079 1080 PetscFunctionReturn(0); 1081 } 1082 1083 1084 1085 1086