1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 252 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_INFO) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 352 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 354 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 355 } else { 356 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* ----------------------------------------------------------- */ 410 #undef __FUNCT__ 411 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 412 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 413 { 414 Mat C=*B; 415 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 416 IS isrow = b->row,isicol = b->icol; 417 PetscErrorCode ierr; 418 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 419 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 420 PetscInt *diag_offset = b->diag,diag,*pj; 421 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps,*aa=a->a; 422 PetscScalar d; 423 PetscReal rs; 424 LUShift_Ctx sctx; 425 PetscInt newshift,*ddiag; 426 427 PetscFunctionBegin; 428 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 429 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 430 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 431 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 432 rtmps = rtmp; ics = ic; 433 434 sctx.shift_top = 0; 435 sctx.nshift_max = 0; 436 sctx.shift_lo = 0; 437 sctx.shift_hi = 0; 438 439 if (!a->diag) { 440 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 441 } 442 /* if both shift schemes are chosen by user, only use info->shiftpd */ 443 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 444 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 445 PetscInt *aai = a->i; 446 ddiag = a->diag; 447 sctx.shift_top = 0; 448 for (i=0; i<n; i++) { 449 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 450 d = (a->a)[ddiag[i]]; 451 rs = -PetscAbsScalar(d) - PetscRealPart(d); 452 v = a->a+aai[i]; 453 nz = aai[i+1] - aai[i]; 454 for (j=0; j<nz; j++) 455 rs += PetscAbsScalar(v[j]); 456 if (rs>sctx.shift_top) sctx.shift_top = rs; 457 } 458 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 459 sctx.shift_top *= 1.1; 460 sctx.nshift_max = 5; 461 sctx.shift_lo = 0.; 462 sctx.shift_hi = 1.; 463 } 464 465 sctx.shift_amount = 0; 466 sctx.nshift = 0; 467 do { 468 sctx.lushift = PETSC_FALSE; 469 for (i=0; i<n; i++){ 470 nz = bi[i+1] - bi[i]; 471 bjtmp = bj + bi[i]; 472 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 473 474 /* load in initial (unfactored row) */ 475 nz = a->i[r[i]+1] - a->i[r[i]]; 476 ajtmp = a->j + a->i[r[i]]; 477 v = a->a + a->i[r[i]]; 478 for (j=0; j<nz; j++) { 479 rtmp[ics[ajtmp[j]]] = v[j]; 480 } 481 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 482 483 row = *bjtmp++; 484 while (row < i) { 485 pc = rtmp + row; 486 if (*pc != 0.0) { 487 pv = b->a + diag_offset[row]; 488 pj = b->j + diag_offset[row] + 1; 489 multiplier = *pc / *pv++; 490 *pc = multiplier; 491 nz = bi[row+1] - diag_offset[row] - 1; 492 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 493 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 494 } 495 row = *bjtmp++; 496 } 497 /* finished row so stick it into b->a */ 498 pv = b->a + bi[i] ; 499 pj = b->j + bi[i] ; 500 nz = bi[i+1] - bi[i]; 501 diag = diag_offset[i] - bi[i]; 502 rs = 0.0; 503 for (j=0; j<nz; j++) { 504 pv[j] = rtmps[pj[j]]; 505 if (j != diag) rs += PetscAbsScalar(pv[j]); 506 } 507 508 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 509 sctx.rs = rs; 510 sctx.pv = pv[diag]; 511 ierr = MatLUCheckShift_inline(info,sctx,i,aa,a->diag,newshift);CHKERRQ(ierr); 512 if (newshift == 1){ 513 break; /* sctx.shift_amount is updated */ 514 } else if (newshift == -1){ 515 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 516 } 517 } 518 519 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 520 /* 521 * if no shift in this attempt & shifting & started shifting & can refine, 522 * then try lower shift 523 */ 524 sctx.shift_hi = info->shift_fraction; 525 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 526 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 527 sctx.lushift = PETSC_TRUE; 528 sctx.nshift++; 529 } 530 } while (sctx.lushift); 531 532 /* invert diagonal entries for simplier triangular solves */ 533 for (i=0; i<n; i++) { 534 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 535 } 536 537 ierr = PetscFree(rtmp);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 539 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 540 C->factor = FACTOR_LU; 541 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 542 C->assembled = PETSC_TRUE; 543 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 544 if (sctx.nshift){ 545 if (info->shiftnz) { 546 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 547 } else if (info->shiftpd) { 548 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 549 } 550 } 551 PetscFunctionReturn(0); 552 } 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 556 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 557 { 558 PetscFunctionBegin; 559 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 560 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 561 PetscFunctionReturn(0); 562 } 563 564 565 /* ----------------------------------------------------------- */ 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatLUFactor_SeqAIJ" 568 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 569 { 570 PetscErrorCode ierr; 571 Mat C; 572 573 PetscFunctionBegin; 574 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 575 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 576 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 577 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 /* ----------------------------------------------------------- */ 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatSolve_SeqAIJ" 583 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 584 { 585 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 586 IS iscol = a->col,isrow = a->row; 587 PetscErrorCode ierr; 588 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 589 PetscInt nz,*rout,*cout; 590 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 591 592 PetscFunctionBegin; 593 if (!n) PetscFunctionReturn(0); 594 595 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 596 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 597 tmp = a->solve_work; 598 599 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 600 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 601 602 /* forward solve the lower triangular */ 603 tmp[0] = b[*r++]; 604 tmps = tmp; 605 for (i=1; i<n; i++) { 606 v = aa + ai[i] ; 607 vi = aj + ai[i] ; 608 nz = a->diag[i] - ai[i]; 609 sum = b[*r++]; 610 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 611 tmp[i] = sum; 612 } 613 614 /* backward solve the upper triangular */ 615 for (i=n-1; i>=0; i--){ 616 v = aa + a->diag[i] + 1; 617 vi = aj + a->diag[i] + 1; 618 nz = ai[i+1] - a->diag[i] - 1; 619 sum = tmp[i]; 620 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 621 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 622 } 623 624 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 625 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 626 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 627 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 628 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatMatSolve_SeqAIJ" 634 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 635 { 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 637 IS iscol = a->col,isrow = a->row; 638 PetscErrorCode ierr; 639 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 640 PetscInt nz,*rout,*cout,neq; 641 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 642 643 PetscFunctionBegin; 644 if (!n) PetscFunctionReturn(0); 645 646 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 647 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 648 649 tmp = a->solve_work; 650 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 651 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 652 653 for (neq=0; neq<n; neq++){ 654 /* forward solve the lower triangular */ 655 tmp[0] = b[r[0]]; 656 tmps = tmp; 657 for (i=1; i<n; i++) { 658 v = aa + ai[i] ; 659 vi = aj + ai[i] ; 660 nz = a->diag[i] - ai[i]; 661 sum = b[r[i]]; 662 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 663 tmp[i] = sum; 664 } 665 /* backward solve the upper triangular */ 666 for (i=n-1; i>=0; i--){ 667 v = aa + a->diag[i] + 1; 668 vi = aj + a->diag[i] + 1; 669 nz = ai[i+1] - a->diag[i] - 1; 670 sum = tmp[i]; 671 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 672 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 673 } 674 675 b += n; 676 x += n; 677 } 678 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 679 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 680 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 681 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 682 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 /* ----------------------------------------------------------- */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 689 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 690 { 691 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 692 PetscErrorCode ierr; 693 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 694 PetscScalar *x,*b,*aa = a->a; 695 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 696 PetscInt adiag_i,i,*vi,nz,ai_i; 697 PetscScalar *v,sum; 698 #endif 699 700 PetscFunctionBegin; 701 if (!n) PetscFunctionReturn(0); 702 703 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 704 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 705 706 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 707 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 708 #else 709 /* forward solve the lower triangular */ 710 x[0] = b[0]; 711 for (i=1; i<n; i++) { 712 ai_i = ai[i]; 713 v = aa + ai_i; 714 vi = aj + ai_i; 715 nz = adiag[i] - ai_i; 716 sum = b[i]; 717 while (nz--) sum -= *v++ * x[*vi++]; 718 x[i] = sum; 719 } 720 721 /* backward solve the upper triangular */ 722 for (i=n-1; i>=0; i--){ 723 adiag_i = adiag[i]; 724 v = aa + adiag_i + 1; 725 vi = aj + adiag_i + 1; 726 nz = ai[i+1] - adiag_i - 1; 727 sum = x[i]; 728 while (nz--) sum -= *v++ * x[*vi++]; 729 x[i] = sum*aa[adiag_i]; 730 } 731 #endif 732 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 733 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 734 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 740 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 741 { 742 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 743 IS iscol = a->col,isrow = a->row; 744 PetscErrorCode ierr; 745 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 746 PetscInt nz,*rout,*cout; 747 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 748 749 PetscFunctionBegin; 750 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 751 752 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 753 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 754 tmp = a->solve_work; 755 756 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 757 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 758 759 /* forward solve the lower triangular */ 760 tmp[0] = b[*r++]; 761 for (i=1; i<n; i++) { 762 v = aa + ai[i] ; 763 vi = aj + ai[i] ; 764 nz = a->diag[i] - ai[i]; 765 sum = b[*r++]; 766 while (nz--) sum -= *v++ * tmp[*vi++ ]; 767 tmp[i] = sum; 768 } 769 770 /* backward solve the upper triangular */ 771 for (i=n-1; i>=0; i--){ 772 v = aa + a->diag[i] + 1; 773 vi = aj + a->diag[i] + 1; 774 nz = ai[i+1] - a->diag[i] - 1; 775 sum = tmp[i]; 776 while (nz--) sum -= *v++ * tmp[*vi++ ]; 777 tmp[i] = sum*aa[a->diag[i]]; 778 x[*c--] += tmp[i]; 779 } 780 781 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 782 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 783 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 784 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 785 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 786 787 PetscFunctionReturn(0); 788 } 789 /* -------------------------------------------------------------------*/ 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 792 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 793 { 794 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 795 IS iscol = a->col,isrow = a->row; 796 PetscErrorCode ierr; 797 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 798 PetscInt nz,*rout,*cout,*diag = a->diag; 799 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 800 801 PetscFunctionBegin; 802 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 tmp = a->solve_work; 805 806 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 807 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 808 809 /* copy the b into temp work space according to permutation */ 810 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 811 812 /* forward solve the U^T */ 813 for (i=0; i<n; i++) { 814 v = aa + diag[i] ; 815 vi = aj + diag[i] + 1; 816 nz = ai[i+1] - diag[i] - 1; 817 s1 = tmp[i]; 818 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 819 while (nz--) { 820 tmp[*vi++ ] -= (*v++)*s1; 821 } 822 tmp[i] = s1; 823 } 824 825 /* backward solve the L^T */ 826 for (i=n-1; i>=0; i--){ 827 v = aa + diag[i] - 1 ; 828 vi = aj + diag[i] - 1 ; 829 nz = diag[i] - ai[i]; 830 s1 = tmp[i]; 831 while (nz--) { 832 tmp[*vi-- ] -= (*v--)*s1; 833 } 834 } 835 836 /* copy tmp into x according to permutation */ 837 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 838 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 842 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 843 844 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 850 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 853 IS iscol = a->col,isrow = a->row; 854 PetscErrorCode ierr; 855 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 856 PetscInt nz,*rout,*cout,*diag = a->diag; 857 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 858 859 PetscFunctionBegin; 860 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 861 862 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 tmp = a->solve_work; 865 866 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 867 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 868 869 /* copy the b into temp work space according to permutation */ 870 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 871 872 /* forward solve the U^T */ 873 for (i=0; i<n; i++) { 874 v = aa + diag[i] ; 875 vi = aj + diag[i] + 1; 876 nz = ai[i+1] - diag[i] - 1; 877 tmp[i] *= *v++; 878 while (nz--) { 879 tmp[*vi++ ] -= (*v++)*tmp[i]; 880 } 881 } 882 883 /* backward solve the L^T */ 884 for (i=n-1; i>=0; i--){ 885 v = aa + diag[i] - 1 ; 886 vi = aj + diag[i] - 1 ; 887 nz = diag[i] - ai[i]; 888 while (nz--) { 889 tmp[*vi-- ] -= (*v--)*tmp[i]; 890 } 891 } 892 893 /* copy tmp into x according to permutation */ 894 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 895 896 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 897 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 898 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 899 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 900 901 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 /* ----------------------------------------------------------------*/ 905 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 909 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 910 { 911 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 912 IS isicol; 913 PetscErrorCode ierr; 914 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 915 PetscInt *bi,*cols,nnz,*cols_lvl; 916 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 917 PetscInt i,levels,diagonal_fill; 918 PetscTruth col_identity,row_identity; 919 PetscReal f; 920 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 921 PetscBT lnkbt; 922 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 923 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 924 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 925 926 PetscFunctionBegin; 927 f = info->fill; 928 levels = (PetscInt)info->levels; 929 diagonal_fill = (PetscInt)info->diagonal_fill; 930 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 931 932 /* special case that simply copies fill pattern */ 933 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 934 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 935 if (!levels && row_identity && col_identity) { 936 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 937 (*fact)->factor = FACTOR_LU; 938 b = (Mat_SeqAIJ*)(*fact)->data; 939 if (!b->diag) { 940 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 941 } 942 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 943 b->row = isrow; 944 b->col = iscol; 945 b->icol = isicol; 946 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 947 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 948 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 949 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 954 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 955 956 /* get new row pointers */ 957 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 958 bi[0] = 0; 959 /* bdiag is location of diagonal in factor */ 960 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 961 bdiag[0] = 0; 962 963 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 964 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 965 966 /* create a linked list for storing column indices of the active row */ 967 nlnk = n + 1; 968 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 969 970 /* initial FreeSpace size is f*(ai[n]+1) */ 971 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 972 current_space = free_space; 973 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 974 current_space_lvl = free_space_lvl; 975 976 for (i=0; i<n; i++) { 977 nzi = 0; 978 /* copy current row into linked list */ 979 nnz = ai[r[i]+1] - ai[r[i]]; 980 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 981 cols = aj + ai[r[i]]; 982 lnk[i] = -1; /* marker to indicate if diagonal exists */ 983 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 984 nzi += nlnk; 985 986 /* make sure diagonal entry is included */ 987 if (diagonal_fill && lnk[i] == -1) { 988 fm = n; 989 while (lnk[fm] < i) fm = lnk[fm]; 990 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 991 lnk[fm] = i; 992 lnk_lvl[i] = 0; 993 nzi++; dcount++; 994 } 995 996 /* add pivot rows into the active row */ 997 nzbd = 0; 998 prow = lnk[n]; 999 while (prow < i) { 1000 nnz = bdiag[prow]; 1001 cols = bj_ptr[prow] + nnz + 1; 1002 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1003 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1004 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1005 nzi += nlnk; 1006 prow = lnk[prow]; 1007 nzbd++; 1008 } 1009 bdiag[i] = nzbd; 1010 bi[i+1] = bi[i] + nzi; 1011 1012 /* if free space is not available, make more free space */ 1013 if (current_space->local_remaining<nzi) { 1014 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1015 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1016 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1017 reallocs++; 1018 } 1019 1020 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1021 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1022 bj_ptr[i] = current_space->array; 1023 bjlvl_ptr[i] = current_space_lvl->array; 1024 1025 /* make sure the active row i has diagonal entry */ 1026 if (*(bj_ptr[i]+bdiag[i]) != i) { 1027 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1028 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1029 } 1030 1031 current_space->array += nzi; 1032 current_space->local_used += nzi; 1033 current_space->local_remaining -= nzi; 1034 current_space_lvl->array += nzi; 1035 current_space_lvl->local_used += nzi; 1036 current_space_lvl->local_remaining -= nzi; 1037 } 1038 1039 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1041 1042 /* destroy list of free space and other temporary arrays */ 1043 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1044 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1045 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1046 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1047 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_INFO) 1050 { 1051 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1052 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1053 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1054 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1055 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1056 if (diagonal_fill) { 1057 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1058 } 1059 } 1060 #endif 1061 1062 /* put together the new matrix */ 1063 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1064 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1065 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1066 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1067 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1068 b = (Mat_SeqAIJ*)(*fact)->data; 1069 b->freedata = PETSC_TRUE; 1070 b->singlemalloc = PETSC_FALSE; 1071 len = (bi[n] )*sizeof(PetscScalar); 1072 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1073 b->j = bj; 1074 b->i = bi; 1075 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1076 b->diag = bdiag; 1077 b->ilen = 0; 1078 b->imax = 0; 1079 b->row = isrow; 1080 b->col = iscol; 1081 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1083 b->icol = isicol; 1084 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1085 /* In b structure: Free imax, ilen, old a, old j. 1086 Allocate bdiag, solve_work, new a, new j */ 1087 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1088 b->maxnz = b->nz = bi[n] ; 1089 (*fact)->factor = FACTOR_LU; 1090 (*fact)->info.factor_mallocs = reallocs; 1091 (*fact)->info.fill_ratio_given = f; 1092 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1093 1094 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1095 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #include "src/mat/impls/sbaij/seq/sbaij.h" 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1104 { 1105 Mat C = *B; 1106 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1107 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1108 IS ip=b->row; 1109 PetscErrorCode ierr; 1110 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1111 PetscInt *ai=a->i,*aj=a->j; 1112 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1113 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1114 PetscReal zeropivot,rs,shiftnz; 1115 PetscReal shiftpd; 1116 ChShift_Ctx sctx; 1117 PetscInt newshift; 1118 1119 PetscFunctionBegin; 1120 shiftnz = info->shiftnz; 1121 shiftpd = info->shiftpd; 1122 zeropivot = info->zeropivot; 1123 1124 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1125 1126 /* initialization */ 1127 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1128 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1129 jl = il + mbs; 1130 rtmp = (MatScalar*)(jl + mbs); 1131 1132 sctx.shift_amount = 0; 1133 sctx.nshift = 0; 1134 do { 1135 sctx.chshift = PETSC_FALSE; 1136 for (i=0; i<mbs; i++) { 1137 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1138 } 1139 1140 for (k = 0; k<mbs; k++){ 1141 bval = ba + bi[k]; 1142 /* initialize k-th row by the perm[k]-th row of A */ 1143 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1144 for (j = jmin; j < jmax; j++){ 1145 col = rip[aj[j]]; 1146 if (col >= k){ /* only take upper triangular entry */ 1147 rtmp[col] = aa[j]; 1148 *bval++ = 0.0; /* for in-place factorization */ 1149 } 1150 } 1151 /* shift the diagonal of the matrix */ 1152 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1153 1154 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1155 dk = rtmp[k]; 1156 i = jl[k]; /* first row to be added to k_th row */ 1157 1158 while (i < k){ 1159 nexti = jl[i]; /* next row to be added to k_th row */ 1160 1161 /* compute multiplier, update diag(k) and U(i,k) */ 1162 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1163 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1164 dk += uikdi*ba[ili]; 1165 ba[ili] = uikdi; /* -U(i,k) */ 1166 1167 /* add multiple of row i to k-th row */ 1168 jmin = ili + 1; jmax = bi[i+1]; 1169 if (jmin < jmax){ 1170 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1171 /* update il and jl for row i */ 1172 il[i] = jmin; 1173 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1174 } 1175 i = nexti; 1176 } 1177 1178 /* shift the diagonals when zero pivot is detected */ 1179 /* compute rs=sum of abs(off-diagonal) */ 1180 rs = 0.0; 1181 jmin = bi[k]+1; 1182 nz = bi[k+1] - jmin; 1183 if (nz){ 1184 bcol = bj + jmin; 1185 while (nz--){ 1186 rs += PetscAbsScalar(rtmp[*bcol]); 1187 bcol++; 1188 } 1189 } 1190 1191 sctx.rs = rs; 1192 sctx.pv = dk; 1193 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1194 if (newshift == 1){ 1195 break; /* sctx.shift_amount is updated */ 1196 } else if (newshift == -1){ 1197 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",k,PetscAbsScalar(dk),zeropivot,rs); 1198 } 1199 1200 /* copy data into U(k,:) */ 1201 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1202 jmin = bi[k]+1; jmax = bi[k+1]; 1203 if (jmin < jmax) { 1204 for (j=jmin; j<jmax; j++){ 1205 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1206 } 1207 /* add the k-th row into il and jl */ 1208 il[k] = jmin; 1209 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1210 } 1211 } 1212 } while (sctx.chshift); 1213 ierr = PetscFree(il);CHKERRQ(ierr); 1214 1215 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1216 C->factor = FACTOR_CHOLESKY; 1217 C->assembled = PETSC_TRUE; 1218 C->preallocated = PETSC_TRUE; 1219 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1220 if (sctx.nshift){ 1221 if (shiftnz) { 1222 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1223 } else if (shiftpd) { 1224 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1225 } 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1232 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1233 { 1234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1235 Mat_SeqSBAIJ *b; 1236 Mat B; 1237 PetscErrorCode ierr; 1238 PetscTruth perm_identity; 1239 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1240 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1241 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1242 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1243 PetscReal fill=info->fill,levels=info->levels; 1244 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1245 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1246 PetscBT lnkbt; 1247 1248 PetscFunctionBegin; 1249 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1250 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1251 1252 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1253 ui[0] = 0; 1254 1255 /* special case that simply copies fill pattern */ 1256 if (!levels && perm_identity) { 1257 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1258 for (i=0; i<am; i++) { 1259 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1260 } 1261 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1262 cols = uj; 1263 for (i=0; i<am; i++) { 1264 aj = a->j + a->diag[i]; 1265 ncols = ui[i+1] - ui[i]; 1266 for (j=0; j<ncols; j++) *cols++ = *aj++; 1267 } 1268 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1269 /* initialization */ 1270 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1271 1272 /* jl: linked list for storing indices of the pivot rows 1273 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1274 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1275 il = jl + am; 1276 uj_ptr = (PetscInt**)(il + am); 1277 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1278 for (i=0; i<am; i++){ 1279 jl[i] = am; il[i] = 0; 1280 } 1281 1282 /* create and initialize a linked list for storing column indices of the active row k */ 1283 nlnk = am + 1; 1284 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1285 1286 /* initial FreeSpace size is fill*(ai[am]+1) */ 1287 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1288 current_space = free_space; 1289 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1290 current_space_lvl = free_space_lvl; 1291 1292 for (k=0; k<am; k++){ /* for each active row k */ 1293 /* initialize lnk by the column indices of row rip[k] of A */ 1294 nzk = 0; 1295 ncols = ai[rip[k]+1] - ai[rip[k]]; 1296 ncols_upper = 0; 1297 for (j=0; j<ncols; j++){ 1298 i = *(aj + ai[rip[k]] + j); 1299 if (rip[i] >= k){ /* only take upper triangular entry */ 1300 ajtmp[ncols_upper] = i; 1301 ncols_upper++; 1302 } 1303 } 1304 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1305 nzk += nlnk; 1306 1307 /* update lnk by computing fill-in for each pivot row to be merged in */ 1308 prow = jl[k]; /* 1st pivot row */ 1309 1310 while (prow < k){ 1311 nextprow = jl[prow]; 1312 1313 /* merge prow into k-th row */ 1314 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1315 jmax = ui[prow+1]; 1316 ncols = jmax-jmin; 1317 i = jmin - ui[prow]; 1318 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1319 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1320 j = *(uj - 1); 1321 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1322 nzk += nlnk; 1323 1324 /* update il and jl for prow */ 1325 if (jmin < jmax){ 1326 il[prow] = jmin; 1327 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1328 } 1329 prow = nextprow; 1330 } 1331 1332 /* if free space is not available, make more free space */ 1333 if (current_space->local_remaining<nzk) { 1334 i = am - k + 1; /* num of unfactored rows */ 1335 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1336 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1337 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1338 reallocs++; 1339 } 1340 1341 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1342 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1343 1344 /* add the k-th row into il and jl */ 1345 if (nzk > 1){ 1346 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1347 jl[k] = jl[i]; jl[i] = k; 1348 il[k] = ui[k] + 1; 1349 } 1350 uj_ptr[k] = current_space->array; 1351 uj_lvl_ptr[k] = current_space_lvl->array; 1352 1353 current_space->array += nzk; 1354 current_space->local_used += nzk; 1355 current_space->local_remaining -= nzk; 1356 1357 current_space_lvl->array += nzk; 1358 current_space_lvl->local_used += nzk; 1359 current_space_lvl->local_remaining -= nzk; 1360 1361 ui[k+1] = ui[k] + nzk; 1362 } 1363 1364 #if defined(PETSC_USE_INFO) 1365 if (ai[am] != 0) { 1366 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1367 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1368 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1369 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1370 } else { 1371 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1372 } 1373 #endif 1374 1375 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1376 ierr = PetscFree(jl);CHKERRQ(ierr); 1377 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1378 1379 /* destroy list of free space and other temporary array(s) */ 1380 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1381 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1382 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1383 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1384 1385 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1386 1387 /* put together the new matrix in MATSEQSBAIJ format */ 1388 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1389 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1390 B = *fact; 1391 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1392 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1393 1394 b = (Mat_SeqSBAIJ*)B->data; 1395 b->singlemalloc = PETSC_FALSE; 1396 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1397 b->j = uj; 1398 b->i = ui; 1399 b->diag = 0; 1400 b->ilen = 0; 1401 b->imax = 0; 1402 b->row = perm; 1403 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1404 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1405 b->icol = perm; 1406 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1407 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1408 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1409 b->maxnz = b->nz = ui[am]; 1410 1411 B->factor = FACTOR_CHOLESKY; 1412 B->info.factor_mallocs = reallocs; 1413 B->info.fill_ratio_given = fill; 1414 if (ai[am] != 0) { 1415 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1416 } else { 1417 B->info.fill_ratio_needed = 0.0; 1418 } 1419 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1420 if (perm_identity){ 1421 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1422 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1429 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1430 { 1431 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1432 Mat_SeqSBAIJ *b; 1433 Mat B; 1434 PetscErrorCode ierr; 1435 PetscTruth perm_identity; 1436 PetscReal fill = info->fill; 1437 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1438 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1439 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1440 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1441 PetscBT lnkbt; 1442 IS iperm; 1443 1444 PetscFunctionBegin; 1445 /* check whether perm is the identity mapping */ 1446 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1447 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1448 1449 if (!perm_identity){ 1450 /* check if perm is symmetric! */ 1451 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1452 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1453 for (i=0; i<am; i++) { 1454 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1455 } 1456 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1457 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1458 } 1459 1460 /* initialization */ 1461 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1462 ui[0] = 0; 1463 1464 /* jl: linked list for storing indices of the pivot rows 1465 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1466 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1467 il = jl + am; 1468 cols = il + am; 1469 ui_ptr = (PetscInt**)(cols + am); 1470 for (i=0; i<am; i++){ 1471 jl[i] = am; il[i] = 0; 1472 } 1473 1474 /* create and initialize a linked list for storing column indices of the active row k */ 1475 nlnk = am + 1; 1476 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1477 1478 /* initial FreeSpace size is fill*(ai[am]+1) */ 1479 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1480 current_space = free_space; 1481 1482 for (k=0; k<am; k++){ /* for each active row k */ 1483 /* initialize lnk by the column indices of row rip[k] of A */ 1484 nzk = 0; 1485 ncols = ai[rip[k]+1] - ai[rip[k]]; 1486 ncols_upper = 0; 1487 for (j=0; j<ncols; j++){ 1488 i = rip[*(aj + ai[rip[k]] + j)]; 1489 if (i >= k){ /* only take upper triangular entry */ 1490 cols[ncols_upper] = i; 1491 ncols_upper++; 1492 } 1493 } 1494 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1495 nzk += nlnk; 1496 1497 /* update lnk by computing fill-in for each pivot row to be merged in */ 1498 prow = jl[k]; /* 1st pivot row */ 1499 1500 while (prow < k){ 1501 nextprow = jl[prow]; 1502 /* merge prow into k-th row */ 1503 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1504 jmax = ui[prow+1]; 1505 ncols = jmax-jmin; 1506 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1507 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1508 nzk += nlnk; 1509 1510 /* update il and jl for prow */ 1511 if (jmin < jmax){ 1512 il[prow] = jmin; 1513 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1514 } 1515 prow = nextprow; 1516 } 1517 1518 /* if free space is not available, make more free space */ 1519 if (current_space->local_remaining<nzk) { 1520 i = am - k + 1; /* num of unfactored rows */ 1521 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1522 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1523 reallocs++; 1524 } 1525 1526 /* copy data into free space, then initialize lnk */ 1527 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1528 1529 /* add the k-th row into il and jl */ 1530 if (nzk-1 > 0){ 1531 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1532 jl[k] = jl[i]; jl[i] = k; 1533 il[k] = ui[k] + 1; 1534 } 1535 ui_ptr[k] = current_space->array; 1536 current_space->array += nzk; 1537 current_space->local_used += nzk; 1538 current_space->local_remaining -= nzk; 1539 1540 ui[k+1] = ui[k] + nzk; 1541 } 1542 1543 #if defined(PETSC_USE_INFO) 1544 if (ai[am] != 0) { 1545 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1546 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1547 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1548 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1551 } 1552 #endif 1553 1554 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1555 ierr = PetscFree(jl);CHKERRQ(ierr); 1556 1557 /* destroy list of free space and other temporary array(s) */ 1558 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1559 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1560 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1561 1562 /* put together the new matrix in MATSEQSBAIJ format */ 1563 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1564 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1565 B = *fact; 1566 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1567 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1568 1569 b = (Mat_SeqSBAIJ*)B->data; 1570 b->singlemalloc = PETSC_FALSE; 1571 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1572 b->j = uj; 1573 b->i = ui; 1574 b->diag = 0; 1575 b->ilen = 0; 1576 b->imax = 0; 1577 b->row = perm; 1578 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1579 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1580 b->icol = perm; 1581 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1582 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1583 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1584 b->maxnz = b->nz = ui[am]; 1585 1586 B->factor = FACTOR_CHOLESKY; 1587 B->info.factor_mallocs = reallocs; 1588 B->info.fill_ratio_given = fill; 1589 if (ai[am] != 0) { 1590 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1591 } else { 1592 B->info.fill_ratio_needed = 0.0; 1593 } 1594 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1595 if (perm_identity){ 1596 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1597 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601