1 /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 #include "src/inline/spops.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN int MatMarkDiagonal_SeqAIJ(Mat); 22 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 23 24 EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*); 25 EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*); 26 EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ishift = 0, for indices start at 1 50 ishift = 1, for indices starting at 0 51 ------------------------------------------------------------ 52 */ 53 int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(1,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 int *c,*r,*ic,ierr,i,n = A->m; 65 int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 int *ordcol,*iwk,*iperm,*jw; 67 int ishift = !a->indexshift; 68 int jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal permtol,af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (int)(info->dtcount/2.0); 79 jmax = (int)(info->fill*a->nz); 80 permtol = info->dtcol; 81 82 83 /* ------------------------------------------------------------ 84 If reorder=.TRUE., then the original matrix has to be 85 reordered to reflect the user selected ordering scheme, and 86 then de-reordered so it is in it's original format. 87 Because Saad's dperm() is NOT in place, we have to copy 88 the original matrix and allocate more storage. . . 89 ------------------------------------------------------------ 90 */ 91 92 /* set reorder to true if either isrow or iscol is not identity */ 93 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 94 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 95 reorder = PetscNot(reorder); 96 97 98 /* storage for ilu factor */ 99 ierr = PetscMalloc((n+1)*sizeof(int),&new_i);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(int),&new_j);CHKERRQ(ierr); 101 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 102 ierr = PetscMalloc(n*sizeof(int),&ordcol);CHKERRQ(ierr); 103 104 /* ------------------------------------------------------------ 105 Make sure that everything is Fortran formatted (1-Based) 106 ------------------------------------------------------------ 107 */ 108 if (ishift) { 109 for (i=old_i[0];i<old_i[n];i++) { 110 old_j[i]++; 111 } 112 for(i=0;i<n+1;i++) { 113 old_i[i]++; 114 }; 115 }; 116 117 if (reorder) { 118 ierr = ISGetIndices(iscol,&c); CHKERRQ(ierr); 119 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 120 for(i=0;i<n;i++) { 121 r[i] = r[i]+1; 122 c[i] = c[i]+1; 123 } 124 ierr = PetscMalloc((n+1)*sizeof(int),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 128 for (i=0;i<n;i++) { 129 r[i] = r[i]-1; 130 c[i] = c[i]-1; 131 } 132 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 134 o_a = old_a2; 135 o_j = old_j2; 136 o_i = old_i2; 137 } else { 138 o_a = old_a; 139 o_j = old_j; 140 o_i = old_i; 141 } 142 143 /* ------------------------------------------------------------ 144 Call Saad's ilutp() routine to generate the factorization 145 ------------------------------------------------------------ 146 */ 147 148 ierr = PetscMalloc(2*n*sizeof(int),&iperm);CHKERRQ(ierr); 149 ierr = PetscMalloc(2*n*sizeof(int),&jw);CHKERRQ(ierr); 150 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 151 152 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr); 153 if (ierr) { 154 switch (ierr) { 155 case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 156 case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax); 157 case -5: SETERRQ(1,"ilutp(), zero row encountered"); 158 case -1: SETERRQ(1,"ilutp(), input matrix may be wrong"); 159 case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax); 160 default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr); 161 } 162 } 163 164 ierr = PetscFree(w);CHKERRQ(ierr); 165 ierr = PetscFree(jw);CHKERRQ(ierr); 166 167 /* ------------------------------------------------------------ 168 Saad's routine gives the result in Modified Sparse Row (msr) 169 Convert to Compressed Sparse Row format (csr) 170 ------------------------------------------------------------ 171 */ 172 173 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 174 ierr = PetscMalloc((n+1)*sizeof(int),&iwk);CHKERRQ(ierr); 175 176 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 177 178 ierr = PetscFree(iwk);CHKERRQ(ierr); 179 ierr = PetscFree(wk);CHKERRQ(ierr); 180 181 if (reorder) { 182 ierr = PetscFree(old_a2);CHKERRQ(ierr); 183 ierr = PetscFree(old_j2);CHKERRQ(ierr); 184 ierr = PetscFree(old_i2);CHKERRQ(ierr); 185 } else { 186 /* fix permutation of old_j that the factorization introduced */ 187 for (i=old_i[0]; i<old_i[n]; i++) { 188 old_j[i-1] = iperm[old_j[i-1]-1]; 189 } 190 } 191 192 /* get rid of the shift to indices starting at 1 */ 193 if (ishift) { 194 for (i=0; i<n+1; i++) { 195 old_i[i]--; 196 } 197 for (i=old_i[0];i<old_i[n];i++) { 198 old_j[i]--; 199 } 200 } 201 202 /* Make the factored matrix 0-based */ 203 if (ishift) { 204 for (i=0; i<n+1; i++) { 205 new_i[i]--; 206 } 207 for (i=new_i[0];i<new_i[n];i++) { 208 new_j[i]--; 209 } 210 } 211 212 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 213 /*-- permute the right-hand-side and solution vectors --*/ 214 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 215 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 216 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 217 for(i=0; i<n; i++) { 218 ordcol[i] = ic[iperm[i]-1]; 219 }; 220 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 221 ierr = ISDestroy(isicol);CHKERRQ(ierr); 222 223 ierr = PetscFree(iperm);CHKERRQ(ierr); 224 225 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 226 ierr = PetscFree(ordcol);CHKERRQ(ierr); 227 228 /*----- put together the new matrix -----*/ 229 230 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 231 (*fact)->factor = FACTOR_LU; 232 (*fact)->assembled = PETSC_TRUE; 233 234 b = (Mat_SeqAIJ*)(*fact)->data; 235 ierr = PetscFree(b->imax);CHKERRQ(ierr); 236 b->sorted = PETSC_FALSE; 237 b->singlemalloc = PETSC_FALSE; 238 /* the next line frees the default space generated by the MatCreate() */ 239 ierr = PetscFree(b->a);CHKERRQ(ierr); 240 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 241 b->a = new_a; 242 b->j = new_j; 243 b->i = new_i; 244 b->ilen = 0; 245 b->imax = 0; 246 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 247 b->row = isirow; 248 b->col = iscolf; 249 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 250 b->maxnz = b->nz = new_i[n]; 251 b->indexshift = a->indexshift; 252 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 253 (*fact)->info.factor_mallocs = 0; 254 255 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 256 257 /* check out for identical nodes. If found, use inode functions */ 258 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 259 260 af = ((double)b->nz)/((double)a->nz) + .001; 261 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 262 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 263 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 264 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 265 266 PetscFunctionReturn(0); 267 #endif 268 } 269 270 /* 271 Factorization code for AIJ format. 272 */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 275 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j; 280 int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift; 281 int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im; 282 PetscReal f; 283 284 PetscFunctionBegin; 285 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 286 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 287 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 288 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 289 290 /* get new row pointers */ 291 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 292 ainew[0] = -shift; 293 /* don't know how many column pointers are needed so estimate */ 294 f = info->fill; 295 jmax = (int)(f*ai[n]+(!shift)); 296 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 297 /* fill is a linked list of nonzeros in active row */ 298 ierr = PetscMalloc((2*n+1)*sizeof(int),&fill);CHKERRQ(ierr); 299 im = fill + n + 1; 300 /* idnew is location of diagonal in factor */ 301 ierr = PetscMalloc((n+1)*sizeof(int),&idnew);CHKERRQ(ierr); 302 idnew[0] = -shift; 303 304 for (i=0; i<n; i++) { 305 /* first copy previous fill into linked list */ 306 nnz = nz = ai[r[i]+1] - ai[r[i]]; 307 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]] + shift; 309 fill[n] = n; 310 while (nz--) { 311 fm = n; 312 idx = ic[*ajtmp++ + shift]; 313 do { 314 m = fm; 315 fm = fill[m]; 316 } while (fm < idx); 317 fill[m] = idx; 318 fill[idx] = fm; 319 } 320 row = fill[n]; 321 while (row < i) { 322 ajtmp = ajnew + idnew[row] + (!shift); 323 nzbd = 1 + idnew[row] - ainew[row]; 324 nz = im[row] - nzbd; 325 fm = row; 326 while (nz-- > 0) { 327 idx = *ajtmp++ + shift; 328 nzbd++; 329 if (idx == i) im[row] = nzbd; 330 do { 331 m = fm; 332 fm = fill[m]; 333 } while (fm < idx); 334 if (fm != idx) { 335 fill[m] = idx; 336 fill[idx] = fm; 337 fm = idx; 338 nnz++; 339 } 340 } 341 row = fill[row]; 342 } 343 /* copy new filled row into permanent storage */ 344 ainew[i+1] = ainew[i] + nnz; 345 if (ainew[i+1] > jmax) { 346 347 /* estimate how much additional space we will need */ 348 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 349 /* just double the memory each time */ 350 int maxadd = jmax; 351 /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */ 352 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 353 jmax += maxadd; 354 355 /* allocate a longer ajnew */ 356 ierr = PetscMalloc(jmax*sizeof(int),&ajtmp);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 358 ierr = PetscFree(ajnew);CHKERRQ(ierr); 359 ajnew = ajtmp; 360 realloc++; /* count how many times we realloc */ 361 } 362 ajtmp = ajnew + ainew[i] + shift; 363 fm = fill[n]; 364 nzi = 0; 365 im[i] = nnz; 366 while (nnz--) { 367 if (fm < i) nzi++; 368 *ajtmp++ = fm - shift; 369 fm = fill[fm]; 370 } 371 idnew[i] = ainew[i] + nzi; 372 } 373 if (ai[n] != 0) { 374 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 375 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 376 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 377 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 378 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 379 } else { 380 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 381 } 382 383 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 385 386 ierr = PetscFree(fill);CHKERRQ(ierr); 387 388 /* put together the new matrix */ 389 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 390 PetscLogObjectParent(*B,isicol); 391 b = (Mat_SeqAIJ*)(*B)->data; 392 ierr = PetscFree(b->imax);CHKERRQ(ierr); 393 b->singlemalloc = PETSC_FALSE; 394 /* the next line frees the default space generated by the Create() */ 395 ierr = PetscFree(b->a);CHKERRQ(ierr); 396 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 397 ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 398 b->j = ajnew; 399 b->i = ainew; 400 b->diag = idnew; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 b->lu_damping = info->damping; 406 b->lu_zeropivot = info->zeropivot; 407 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 408 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 409 b->icol = isicol; 410 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 411 /* In b structure: Free imax, ilen, old a, old j. 412 Allocate idnew, solve_work, new a, new j */ 413 PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar))); 414 b->maxnz = b->nz = ainew[n] + shift; 415 416 (*B)->factor = FACTOR_LU; 417 (*B)->info.factor_mallocs = realloc; 418 (*B)->info.fill_ratio_given = f; 419 ierr = Mat_AIJ_CheckInode(*B,PETSC_FALSE);CHKERRQ(ierr); 420 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 421 422 if (ai[n] != 0) { 423 (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 424 } else { 425 (*B)->info.fill_ratio_needed = 0.0; 426 } 427 PetscFunctionReturn(0); 428 } 429 /* ----------------------------------------------------------- */ 430 EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth); 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 434 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 435 { 436 Mat C = *B; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data; 438 IS isrow = b->row,isicol = b->icol; 439 int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j; 440 int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift; 441 int *diag_offset = b->diag,diag; 442 int *pj,ndamp = 0; 443 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 444 PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs; 445 PetscTruth damp; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp + shift; ics = ic + shift; 453 454 do { 455 damp = PETSC_FALSE; 456 for (i=0; i<n; i++) { 457 nz = ai[i+1] - ai[i]; 458 ajtmp = aj + ai[i] + shift; 459 for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0; 460 461 /* load in initial (unfactored row) */ 462 nz = a->i[r[i]+1] - a->i[r[i]]; 463 ajtmpold = a->j + a->i[r[i]] + shift; 464 v = a->a + a->i[r[i]] + shift; 465 for (j=0; j<nz; j++) { 466 rtmp[ics[ajtmpold[j]]] = v[j]; 467 if (ndamp && ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */ 468 rtmp[ics[ajtmpold[j]]] += damping; 469 } 470 } 471 472 row = *ajtmp++ + shift; 473 while (row < i) { 474 pc = rtmp + row; 475 if (*pc != 0.0) { 476 pv = b->a + diag_offset[row] + shift; 477 pj = b->j + diag_offset[row] + (!shift); 478 multiplier = *pc / *pv++; 479 *pc = multiplier; 480 nz = ai[row+1] - diag_offset[row] - 1; 481 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 482 PetscLogFlops(2*nz); 483 } 484 row = *ajtmp++ + shift; 485 } 486 /* finished row so stick it into b->a */ 487 pv = b->a + ai[i] + shift; 488 pj = b->j + ai[i] + shift; 489 nz = ai[i+1] - ai[i]; 490 diag = diag_offset[i] - ai[i]; 491 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 492 rs = 0.0; 493 for (j=0; j<nz; j++) { 494 pv[j] = rtmps[pj[j]]; 495 if (j != diag) rs += PetscAbsScalar(pv[j]); 496 } 497 if (PetscAbsScalar(pv[diag]) < zeropivot*rs) { 498 if (damping) { 499 if (ndamp) damping *= 2.0; 500 damp = PETSC_TRUE; 501 ndamp++; 502 break; 503 } else { 504 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs); 505 } 506 } 507 } 508 } while (damp); 509 510 /* invert diagonal entries for simplier triangular solves */ 511 for (i=0; i<n; i++) { 512 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 513 } 514 515 ierr = PetscFree(rtmp);CHKERRQ(ierr); 516 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 517 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 518 C->factor = FACTOR_LU; 519 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 520 C->assembled = PETSC_TRUE; 521 PetscLogFlops(C->n); 522 if (ndamp) { 523 PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping); 524 } 525 PetscFunctionReturn(0); 526 } 527 /* ----------------------------------------------------------- */ 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatLUFactor_SeqAIJ" 530 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info) 531 { 532 int ierr; 533 Mat C; 534 535 PetscFunctionBegin; 536 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 537 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 538 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 539 PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol); 540 PetscFunctionReturn(0); 541 } 542 /* ----------------------------------------------------------- */ 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSolve_SeqAIJ" 545 int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 546 { 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 548 IS iscol = a->col,isrow = a->row; 549 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 550 int nz,shift = a->indexshift,*rout,*cout; 551 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 552 553 PetscFunctionBegin; 554 if (!n) PetscFunctionReturn(0); 555 556 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 557 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 558 tmp = a->solve_work; 559 560 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 561 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 562 563 /* forward solve the lower triangular */ 564 tmp[0] = b[*r++]; 565 tmps = tmp + shift; 566 for (i=1; i<n; i++) { 567 v = aa + ai[i] + shift; 568 vi = aj + ai[i] + shift; 569 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 570 tmp[i] = sum; 571 } 572 573 /* backward solve the upper triangular */ 574 for (i=n-1; i>=0; i--){ 575 v = aa + a->diag[i] + (!shift); 576 vi = aj + a->diag[i] + (!shift); 577 nz = ai[i+1] - a->diag[i] - 1; 578 sum = tmp[i]; 579 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 580 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 581 } 582 583 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 584 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 585 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 586 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 587 PetscLogFlops(2*a->nz - A->n); 588 PetscFunctionReturn(0); 589 } 590 591 /* ----------------------------------------------------------- */ 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 594 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr; 598 PetscScalar *x,*b,*aa = a->a; 599 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 600 int adiag_i,i,*vi,nz,ai_i; 601 PetscScalar *v,sum; 602 #endif 603 604 PetscFunctionBegin; 605 if (!n) PetscFunctionReturn(0); 606 if (a->indexshift) { 607 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 612 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 613 614 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 615 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 616 #else 617 /* forward solve the lower triangular */ 618 x[0] = b[0]; 619 for (i=1; i<n; i++) { 620 ai_i = ai[i]; 621 v = aa + ai_i; 622 vi = aj + ai_i; 623 nz = adiag[i] - ai_i; 624 sum = b[i]; 625 while (nz--) sum -= *v++ * x[*vi++]; 626 x[i] = sum; 627 } 628 629 /* backward solve the upper triangular */ 630 for (i=n-1; i>=0; i--){ 631 adiag_i = adiag[i]; 632 v = aa + adiag_i + 1; 633 vi = aj + adiag_i + 1; 634 nz = ai[i+1] - adiag_i - 1; 635 sum = x[i]; 636 while (nz--) sum -= *v++ * x[*vi++]; 637 x[i] = sum*aa[adiag_i]; 638 } 639 #endif 640 PetscLogFlops(2*a->nz - A->n); 641 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 642 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 648 int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 649 { 650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 651 IS iscol = a->col,isrow = a->row; 652 int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 653 int nz,shift = a->indexshift,*rout,*cout; 654 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 655 656 PetscFunctionBegin; 657 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 658 659 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 660 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 661 tmp = a->solve_work; 662 663 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 664 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 665 666 /* forward solve the lower triangular */ 667 tmp[0] = b[*r++]; 668 for (i=1; i<n; i++) { 669 v = aa + ai[i] + shift; 670 vi = aj + ai[i] + shift; 671 nz = a->diag[i] - ai[i]; 672 sum = b[*r++]; 673 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 674 tmp[i] = sum; 675 } 676 677 /* backward solve the upper triangular */ 678 for (i=n-1; i>=0; i--){ 679 v = aa + a->diag[i] + (!shift); 680 vi = aj + a->diag[i] + (!shift); 681 nz = ai[i+1] - a->diag[i] - 1; 682 sum = tmp[i]; 683 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 684 tmp[i] = sum*aa[a->diag[i]+shift]; 685 x[*c--] += tmp[i]; 686 } 687 688 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 689 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 690 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 691 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 692 PetscLogFlops(2*a->nz); 693 694 PetscFunctionReturn(0); 695 } 696 /* -------------------------------------------------------------------*/ 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 699 int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 700 { 701 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 702 IS iscol = a->col,isrow = a->row; 703 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 704 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 705 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 706 707 PetscFunctionBegin; 708 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 tmp = a->solve_work; 711 712 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 713 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 714 715 /* copy the b into temp work space according to permutation */ 716 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 717 718 /* forward solve the U^T */ 719 for (i=0; i<n; i++) { 720 v = aa + diag[i] + shift; 721 vi = aj + diag[i] + (!shift); 722 nz = ai[i+1] - diag[i] - 1; 723 s1 = tmp[i]; 724 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 725 while (nz--) { 726 tmp[*vi++ + shift] -= (*v++)*s1; 727 } 728 tmp[i] = s1; 729 } 730 731 /* backward solve the L^T */ 732 for (i=n-1; i>=0; i--){ 733 v = aa + diag[i] - 1 + shift; 734 vi = aj + diag[i] - 1 + shift; 735 nz = diag[i] - ai[i]; 736 s1 = tmp[i]; 737 while (nz--) { 738 tmp[*vi-- + shift] -= (*v--)*s1; 739 } 740 } 741 742 /* copy tmp into x according to permutation */ 743 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 744 745 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 750 PetscLogFlops(2*a->nz-A->n); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 756 int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 757 { 758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 759 IS iscol = a->col,isrow = a->row; 760 int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 761 int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag; 762 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 763 764 PetscFunctionBegin; 765 if (zz != xx) VecCopy(zz,xx); 766 767 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 768 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 769 tmp = a->solve_work; 770 771 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 772 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 773 774 /* copy the b into temp work space according to permutation */ 775 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 776 777 /* forward solve the U^T */ 778 for (i=0; i<n; i++) { 779 v = aa + diag[i] + shift; 780 vi = aj + diag[i] + (!shift); 781 nz = ai[i+1] - diag[i] - 1; 782 tmp[i] *= *v++; 783 while (nz--) { 784 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 785 } 786 } 787 788 /* backward solve the L^T */ 789 for (i=n-1; i>=0; i--){ 790 v = aa + diag[i] - 1 + shift; 791 vi = aj + diag[i] - 1 + shift; 792 nz = diag[i] - ai[i]; 793 while (nz--) { 794 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 795 } 796 } 797 798 /* copy tmp into x according to permutation */ 799 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 800 801 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 803 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 805 806 PetscLogFlops(2*a->nz); 807 PetscFunctionReturn(0); 808 } 809 /* ----------------------------------------------------------------*/ 810 EXTERN int MatMissingDiagonal_SeqAIJ(Mat); 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 814 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 815 { 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 817 IS isicol; 818 int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j; 819 int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev; 820 int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0; 821 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 822 PetscTruth col_identity,row_identity; 823 PetscReal f; 824 825 PetscFunctionBegin; 826 f = info->fill; 827 levels = (int)info->levels; 828 diagonal_fill = (int)info->diagonal_fill; 829 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 830 831 /* special case that simply copies fill pattern */ 832 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 833 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 834 if (!levels && row_identity && col_identity) { 835 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 836 (*fact)->factor = FACTOR_LU; 837 b = (Mat_SeqAIJ*)(*fact)->data; 838 if (!b->diag) { 839 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 840 } 841 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 842 b->row = isrow; 843 b->col = iscol; 844 b->icol = isicol; 845 b->lu_damping = info->damping; 846 b->lu_zeropivot = info->zeropivot; 847 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 848 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 849 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 850 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 855 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 856 857 /* get new row pointers */ 858 ierr = PetscMalloc((n+1)*sizeof(int),&ainew);CHKERRQ(ierr); 859 ainew[0] = -shift; 860 /* don't know how many column pointers are needed so estimate */ 861 jmax = (int)(f*(ai[n]+!shift)); 862 ierr = PetscMalloc((jmax)*sizeof(int),&ajnew);CHKERRQ(ierr); 863 /* ajfill is level of fill for each fill entry */ 864 ierr = PetscMalloc((jmax)*sizeof(int),&ajfill);CHKERRQ(ierr); 865 /* fill is a linked list of nonzeros in active row */ 866 ierr = PetscMalloc((n+1)*sizeof(int),&fill);CHKERRQ(ierr); 867 /* im is level for each filled value */ 868 ierr = PetscMalloc((n+1)*sizeof(int),&im);CHKERRQ(ierr); 869 /* dloc is location of diagonal in factor */ 870 ierr = PetscMalloc((n+1)*sizeof(int),&dloc);CHKERRQ(ierr); 871 dloc[0] = 0; 872 for (prow=0; prow<n; prow++) { 873 874 /* copy current row into linked list */ 875 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 876 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 877 xi = aj + ai[r[prow]] + shift; 878 fill[n] = n; 879 fill[prow] = -1; /* marker to indicate if diagonal exists */ 880 while (nz--) { 881 fm = n; 882 idx = ic[*xi++ + shift]; 883 do { 884 m = fm; 885 fm = fill[m]; 886 } while (fm < idx); 887 fill[m] = idx; 888 fill[idx] = fm; 889 im[idx] = 0; 890 } 891 892 /* make sure diagonal entry is included */ 893 if (diagonal_fill && fill[prow] == -1) { 894 fm = n; 895 while (fill[fm] < prow) fm = fill[fm]; 896 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 897 fill[fm] = prow; 898 im[prow] = 0; 899 nzf++; 900 dcount++; 901 } 902 903 nzi = 0; 904 row = fill[n]; 905 while (row < prow) { 906 incrlev = im[row] + 1; 907 nz = dloc[row]; 908 xi = ajnew + ainew[row] + shift + nz + 1; 909 flev = ajfill + ainew[row] + shift + nz + 1; 910 nnz = ainew[row+1] - ainew[row] - nz - 1; 911 fm = row; 912 while (nnz-- > 0) { 913 idx = *xi++ + shift; 914 if (*flev + incrlev > levels) { 915 flev++; 916 continue; 917 } 918 do { 919 m = fm; 920 fm = fill[m]; 921 } while (fm < idx); 922 if (fm != idx) { 923 im[idx] = *flev + incrlev; 924 fill[m] = idx; 925 fill[idx] = fm; 926 fm = idx; 927 nzf++; 928 } else { 929 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 930 } 931 flev++; 932 } 933 row = fill[row]; 934 nzi++; 935 } 936 /* copy new filled row into permanent storage */ 937 ainew[prow+1] = ainew[prow] + nzf; 938 if (ainew[prow+1] > jmax-shift) { 939 940 /* estimate how much additional space we will need */ 941 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 942 /* just double the memory each time */ 943 /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */ 944 int maxadd = jmax; 945 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 946 jmax += maxadd; 947 948 /* allocate a longer ajnew and ajfill */ 949 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 950 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 951 ierr = PetscFree(ajnew);CHKERRQ(ierr); 952 ajnew = xi; 953 ierr = PetscMalloc(jmax*sizeof(int),&xi);CHKERRQ(ierr); 954 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 955 ierr = PetscFree(ajfill);CHKERRQ(ierr); 956 ajfill = xi; 957 realloc++; /* count how many times we realloc */ 958 } 959 xi = ajnew + ainew[prow] + shift; 960 flev = ajfill + ainew[prow] + shift; 961 dloc[prow] = nzi; 962 fm = fill[n]; 963 while (nzf--) { 964 *xi++ = fm - shift; 965 *flev++ = im[fm]; 966 fm = fill[fm]; 967 } 968 /* make sure row has diagonal entry */ 969 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 970 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\ 971 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 972 } 973 } 974 ierr = PetscFree(ajfill);CHKERRQ(ierr); 975 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 976 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 977 ierr = PetscFree(fill);CHKERRQ(ierr); 978 ierr = PetscFree(im);CHKERRQ(ierr); 979 980 { 981 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 982 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 983 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 984 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 985 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 986 if (diagonal_fill) { 987 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount); 988 } 989 } 990 991 /* put together the new matrix */ 992 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 993 PetscLogObjectParent(*fact,isicol); 994 b = (Mat_SeqAIJ*)(*fact)->data; 995 ierr = PetscFree(b->imax);CHKERRQ(ierr); 996 b->singlemalloc = PETSC_FALSE; 997 len = (ainew[n] + shift)*sizeof(PetscScalar); 998 /* the next line frees the default space generated by the Create() */ 999 ierr = PetscFree(b->a);CHKERRQ(ierr); 1000 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1001 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1002 b->j = ajnew; 1003 b->i = ainew; 1004 for (i=0; i<n; i++) dloc[i] += ainew[i]; 1005 b->diag = dloc; 1006 b->ilen = 0; 1007 b->imax = 0; 1008 b->row = isrow; 1009 b->col = iscol; 1010 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1011 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1012 b->icol = isicol; 1013 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1014 /* In b structure: Free imax, ilen, old a, old j. 1015 Allocate dloc, solve_work, new a, new j */ 1016 PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar))); 1017 b->maxnz = b->nz = ainew[n] + shift; 1018 b->lu_damping = info->damping; 1019 b->lu_zeropivot = info->zeropivot; 1020 (*fact)->factor = FACTOR_LU; 1021 ierr = Mat_AIJ_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1022 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1023 1024 (*fact)->info.factor_mallocs = realloc; 1025 (*fact)->info.fill_ratio_given = f; 1026 (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #include "src/mat/impls/sbaij/seq/sbaij.h" 1031 typedef struct { 1032 int levels; 1033 } Mat_SeqAIJ_SeqSBAIJ; 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1037 int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact) 1038 { 1039 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1040 int ierr; 1041 Mat_SeqAIJ_SeqSBAIJ *ptr = (Mat_SeqAIJ_SeqSBAIJ*)(*fact)->spptr; 1042 1043 PetscFunctionBegin; 1044 if (!a->sbaijMat){ 1045 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1046 } 1047 ierr = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(a->sbaijMat,fact);CHKERRQ(ierr); 1048 if (ptr->levels > 0){ 1049 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 1050 } 1051 a->sbaijMat = PETSC_NULL; 1052 ierr = PetscFree(ptr);CHKERRQ(ierr); 1053 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1059 int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,PetscReal fill,int levels,Mat *fact) 1060 { 1061 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1062 Mat_SeqSBAIJ *b; 1063 int ierr; 1064 PetscTruth perm_identity; 1065 Mat_SeqAIJ_SeqSBAIJ *ptr; 1066 1067 PetscFunctionBegin; 1068 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1069 if (!perm_identity){ 1070 SETERRQ(1,"Non-identity permutation is not supported yet"); 1071 } 1072 if (!a->sbaijMat){ 1073 ierr = MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);CHKERRQ(ierr); 1074 } 1075 1076 if (levels > 0){ 1077 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,fill,levels,fact);CHKERRQ(ierr); 1078 1079 } else { /* in-place icc(0) */ 1080 (*fact) = a->sbaijMat; 1081 (*fact)->factor = FACTOR_CHOLESKY; 1082 b = (Mat_SeqSBAIJ*)(*fact)->data; 1083 b->row = perm; 1084 b->icol = perm; 1085 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1086 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1087 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1088 } 1089 1090 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1091 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1092 1093 ierr = PetscNew(Mat_SeqAIJ_SeqSBAIJ,&ptr);CHKERRQ(ierr); 1094 (*fact)->spptr = (void*)ptr; 1095 ptr->levels = levels; /* to be used by CholeskyFactorNumeric_SeqAIJ() */ 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100