1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 317 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 318 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 319 nzi += nlnk; 320 row = lnk[row]; 321 } 322 bi[i+1] = bi[i] + nzi; 323 im[i] = nzi; 324 325 /* mark bdiag */ 326 nzbd = 0; 327 nnz = nzi; 328 k = lnk[n]; 329 while (nnz-- && k < i){ 330 nzbd++; 331 k = lnk[k]; 332 } 333 bdiag[i] = bi[i] + nzbd; 334 335 /* if free space is not available, make more free space */ 336 if (current_space->local_remaining<nzi) { 337 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 338 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 339 reallocs++; 340 } 341 342 /* copy data into free space, then initialize lnk */ 343 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 344 bi_ptr[i] = current_space->array; 345 current_space->array += nzi; 346 current_space->local_used += nzi; 347 current_space->local_remaining -= nzi; 348 } 349 #if defined(PETSC_USE_DEBUG) 350 if (ai[n] != 0) { 351 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 352 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 353 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 354 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 355 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 356 } else { 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 358 } 359 #endif 360 361 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 362 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 363 364 /* destroy list of free space and other temporary array(s) */ 365 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 366 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 367 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 368 ierr = PetscFree(cols);CHKERRQ(ierr); 369 370 /* put together the new matrix */ 371 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 372 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 373 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 374 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 376 b = (Mat_SeqAIJ*)(*B)->data; 377 b->freedata = PETSC_TRUE; 378 b->singlemalloc = PETSC_FALSE; 379 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 380 b->j = bj; 381 b->i = bi; 382 b->diag = bdiag; 383 b->ilen = 0; 384 b->imax = 0; 385 b->row = isrow; 386 b->col = iscol; 387 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 388 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 389 b->icol = isicol; 390 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 391 392 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 393 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 394 b->maxnz = b->nz = bi[n] ; 395 396 (*B)->factor = FACTOR_LU; 397 (*B)->info.factor_mallocs = reallocs; 398 (*B)->info.fill_ratio_given = f; 399 400 if (ai[n] != 0) { 401 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 402 } else { 403 (*B)->info.fill_ratio_needed = 0.0; 404 } 405 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 406 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 407 PetscFunctionReturn(0); 408 } 409 410 /* ----------------------------------------------------------- */ 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 413 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 414 { 415 Mat C=*B; 416 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 417 IS isrow = b->row,isicol = b->icol; 418 PetscErrorCode ierr; 419 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 420 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 421 PetscInt *diag_offset = b->diag,diag,*pj; 422 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 423 PetscScalar d; 424 PetscReal rs; 425 LUShift_Ctx sctx; 426 PetscInt newshift; 427 428 PetscFunctionBegin; 429 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 430 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 431 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 432 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 433 rtmps = rtmp; ics = ic; 434 435 if (!a->diag) { 436 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 437 } 438 /* if both shift schemes are chosen by user, only use info->shiftpd */ 439 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 440 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 441 PetscInt *aai = a->i,*ddiag = a->diag; 442 sctx.shift_top = 0; 443 for (i=0; i<n; i++) { 444 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 445 d = (a->a)[ddiag[i]]; 446 rs = -PetscAbsScalar(d) - PetscRealPart(d); 447 v = a->a+aai[i]; 448 nz = aai[i+1] - aai[i]; 449 for (j=0; j<nz; j++) 450 rs += PetscAbsScalar(v[j]); 451 if (rs>sctx.shift_top) sctx.shift_top = rs; 452 } 453 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 454 sctx.shift_top *= 1.1; 455 sctx.nshift_max = 5; 456 sctx.shift_lo = 0.; 457 sctx.shift_hi = 1.; 458 } 459 460 sctx.shift_amount = 0; 461 sctx.nshift = 0; 462 do { 463 sctx.lushift = PETSC_FALSE; 464 for (i=0; i<n; i++){ 465 nz = bi[i+1] - bi[i]; 466 bjtmp = bj + bi[i]; 467 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 468 469 /* load in initial (unfactored row) */ 470 nz = a->i[r[i]+1] - a->i[r[i]]; 471 ajtmp = a->j + a->i[r[i]]; 472 v = a->a + a->i[r[i]]; 473 for (j=0; j<nz; j++) { 474 rtmp[ics[ajtmp[j]]] = v[j]; 475 } 476 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 477 478 row = *bjtmp++; 479 while (row < i) { 480 pc = rtmp + row; 481 if (*pc != 0.0) { 482 pv = b->a + diag_offset[row]; 483 pj = b->j + diag_offset[row] + 1; 484 multiplier = *pc / *pv++; 485 *pc = multiplier; 486 nz = bi[row+1] - diag_offset[row] - 1; 487 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 488 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 489 } 490 row = *bjtmp++; 491 } 492 /* finished row so stick it into b->a */ 493 pv = b->a + bi[i] ; 494 pj = b->j + bi[i] ; 495 nz = bi[i+1] - bi[i]; 496 diag = diag_offset[i] - bi[i]; 497 rs = 0.0; 498 for (j=0; j<nz; j++) { 499 pv[j] = rtmps[pj[j]]; 500 if (j != diag) rs += PetscAbsScalar(pv[j]); 501 } 502 503 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 504 sctx.rs = rs; 505 sctx.pv = pv[diag]; 506 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 507 if (newshift == 1){ 508 break; /* sctx.shift_amount is updated */ 509 } else if (newshift == -1){ 510 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 511 } 512 } 513 514 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 515 /* 516 * if no shift in this attempt & shifting & started shifting & can refine, 517 * then try lower shift 518 */ 519 sctx.shift_hi = info->shift_fraction; 520 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 521 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 522 sctx.lushift = PETSC_TRUE; 523 sctx.nshift++; 524 } 525 } while (sctx.lushift); 526 527 /* invert diagonal entries for simplier triangular solves */ 528 for (i=0; i<n; i++) { 529 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 530 } 531 532 ierr = PetscFree(rtmp);CHKERRQ(ierr); 533 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 534 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 535 C->factor = FACTOR_LU; 536 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 537 C->assembled = PETSC_TRUE; 538 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 539 if (sctx.nshift){ 540 if (info->shiftnz) { 541 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 542 } else if (info->shiftpd) { 543 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 544 } 545 } 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 551 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 552 { 553 PetscFunctionBegin; 554 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 555 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 556 PetscFunctionReturn(0); 557 } 558 559 560 /* ----------------------------------------------------------- */ 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatLUFactor_SeqAIJ" 563 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 564 { 565 PetscErrorCode ierr; 566 Mat C; 567 568 PetscFunctionBegin; 569 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 570 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 571 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 572 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 /* ----------------------------------------------------------- */ 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatSolve_SeqAIJ" 578 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 579 { 580 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 581 IS iscol = a->col,isrow = a->row; 582 PetscErrorCode ierr; 583 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 584 PetscInt nz,*rout,*cout; 585 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 586 587 PetscFunctionBegin; 588 if (!n) PetscFunctionReturn(0); 589 590 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 tmp = a->solve_work; 593 594 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 595 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 596 597 /* forward solve the lower triangular */ 598 tmp[0] = b[*r++]; 599 tmps = tmp; 600 for (i=1; i<n; i++) { 601 v = aa + ai[i] ; 602 vi = aj + ai[i] ; 603 nz = a->diag[i] - ai[i]; 604 sum = b[*r++]; 605 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 606 tmp[i] = sum; 607 } 608 609 /* backward solve the upper triangular */ 610 for (i=n-1; i>=0; i--){ 611 v = aa + a->diag[i] + 1; 612 vi = aj + a->diag[i] + 1; 613 nz = ai[i+1] - a->diag[i] - 1; 614 sum = tmp[i]; 615 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 616 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 617 } 618 619 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 620 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 621 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 622 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 623 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 /* ----------------------------------------------------------- */ 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 630 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 631 { 632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 633 PetscErrorCode ierr; 634 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 635 PetscScalar *x,*b,*aa = a->a; 636 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 637 PetscInt adiag_i,i,*vi,nz,ai_i; 638 PetscScalar *v,sum; 639 #endif 640 641 PetscFunctionBegin; 642 if (!n) PetscFunctionReturn(0); 643 644 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 645 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 646 647 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 648 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 649 #else 650 /* forward solve the lower triangular */ 651 x[0] = b[0]; 652 for (i=1; i<n; i++) { 653 ai_i = ai[i]; 654 v = aa + ai_i; 655 vi = aj + ai_i; 656 nz = adiag[i] - ai_i; 657 sum = b[i]; 658 while (nz--) sum -= *v++ * x[*vi++]; 659 x[i] = sum; 660 } 661 662 /* backward solve the upper triangular */ 663 for (i=n-1; i>=0; i--){ 664 adiag_i = adiag[i]; 665 v = aa + adiag_i + 1; 666 vi = aj + adiag_i + 1; 667 nz = ai[i+1] - adiag_i - 1; 668 sum = x[i]; 669 while (nz--) sum -= *v++ * x[*vi++]; 670 x[i] = sum*aa[adiag_i]; 671 } 672 #endif 673 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 674 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 675 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 681 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 682 { 683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 684 IS iscol = a->col,isrow = a->row; 685 PetscErrorCode ierr; 686 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 687 PetscInt nz,*rout,*cout; 688 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 689 690 PetscFunctionBegin; 691 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 692 693 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 694 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 695 tmp = a->solve_work; 696 697 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 698 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 699 700 /* forward solve the lower triangular */ 701 tmp[0] = b[*r++]; 702 for (i=1; i<n; i++) { 703 v = aa + ai[i] ; 704 vi = aj + ai[i] ; 705 nz = a->diag[i] - ai[i]; 706 sum = b[*r++]; 707 while (nz--) sum -= *v++ * tmp[*vi++ ]; 708 tmp[i] = sum; 709 } 710 711 /* backward solve the upper triangular */ 712 for (i=n-1; i>=0; i--){ 713 v = aa + a->diag[i] + 1; 714 vi = aj + a->diag[i] + 1; 715 nz = ai[i+1] - a->diag[i] - 1; 716 sum = tmp[i]; 717 while (nz--) sum -= *v++ * tmp[*vi++ ]; 718 tmp[i] = sum*aa[a->diag[i]]; 719 x[*c--] += tmp[i]; 720 } 721 722 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 723 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 724 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 725 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 726 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 727 728 PetscFunctionReturn(0); 729 } 730 /* -------------------------------------------------------------------*/ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 733 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 734 { 735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 736 IS iscol = a->col,isrow = a->row; 737 PetscErrorCode ierr; 738 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 739 PetscInt nz,*rout,*cout,*diag = a->diag; 740 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 745 tmp = a->solve_work; 746 747 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 748 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 749 750 /* copy the b into temp work space according to permutation */ 751 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 752 753 /* forward solve the U^T */ 754 for (i=0; i<n; i++) { 755 v = aa + diag[i] ; 756 vi = aj + diag[i] + 1; 757 nz = ai[i+1] - diag[i] - 1; 758 s1 = tmp[i]; 759 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 760 while (nz--) { 761 tmp[*vi++ ] -= (*v++)*s1; 762 } 763 tmp[i] = s1; 764 } 765 766 /* backward solve the L^T */ 767 for (i=n-1; i>=0; i--){ 768 v = aa + diag[i] - 1 ; 769 vi = aj + diag[i] - 1 ; 770 nz = diag[i] - ai[i]; 771 s1 = tmp[i]; 772 while (nz--) { 773 tmp[*vi-- ] -= (*v--)*s1; 774 } 775 } 776 777 /* copy tmp into x according to permutation */ 778 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 779 780 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 781 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 782 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 783 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 784 785 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 791 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 792 { 793 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 794 IS iscol = a->col,isrow = a->row; 795 PetscErrorCode ierr; 796 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 797 PetscInt nz,*rout,*cout,*diag = a->diag; 798 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 799 800 PetscFunctionBegin; 801 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 802 803 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 805 tmp = a->solve_work; 806 807 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 808 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 809 810 /* copy the b into temp work space according to permutation */ 811 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 812 813 /* forward solve the U^T */ 814 for (i=0; i<n; i++) { 815 v = aa + diag[i] ; 816 vi = aj + diag[i] + 1; 817 nz = ai[i+1] - diag[i] - 1; 818 tmp[i] *= *v++; 819 while (nz--) { 820 tmp[*vi++ ] -= (*v++)*tmp[i]; 821 } 822 } 823 824 /* backward solve the L^T */ 825 for (i=n-1; i>=0; i--){ 826 v = aa + diag[i] - 1 ; 827 vi = aj + diag[i] - 1 ; 828 nz = diag[i] - ai[i]; 829 while (nz--) { 830 tmp[*vi-- ] -= (*v--)*tmp[i]; 831 } 832 } 833 834 /* copy tmp into x according to permutation */ 835 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 836 837 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 838 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 839 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 840 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 841 842 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 843 PetscFunctionReturn(0); 844 } 845 /* ----------------------------------------------------------------*/ 846 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 850 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 853 IS isicol; 854 PetscErrorCode ierr; 855 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 856 PetscInt *bi,*cols,nnz,*cols_lvl; 857 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 858 PetscInt i,levels,diagonal_fill; 859 PetscTruth col_identity,row_identity; 860 PetscReal f; 861 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 862 PetscBT lnkbt; 863 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 864 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 865 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 866 867 PetscFunctionBegin; 868 f = info->fill; 869 levels = (PetscInt)info->levels; 870 diagonal_fill = (PetscInt)info->diagonal_fill; 871 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 872 873 /* special case that simply copies fill pattern */ 874 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 875 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 876 if (!levels && row_identity && col_identity) { 877 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 878 (*fact)->factor = FACTOR_LU; 879 b = (Mat_SeqAIJ*)(*fact)->data; 880 if (!b->diag) { 881 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 882 } 883 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 884 b->row = isrow; 885 b->col = iscol; 886 b->icol = isicol; 887 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 888 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 889 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 890 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 895 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 896 897 /* get new row pointers */ 898 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 899 bi[0] = 0; 900 /* bdiag is location of diagonal in factor */ 901 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 902 bdiag[0] = 0; 903 904 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 905 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 906 907 /* create a linked list for storing column indices of the active row */ 908 nlnk = n + 1; 909 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 910 911 /* initial FreeSpace size is f*(ai[n]+1) */ 912 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 913 current_space = free_space; 914 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 915 current_space_lvl = free_space_lvl; 916 917 for (i=0; i<n; i++) { 918 nzi = 0; 919 /* copy current row into linked list */ 920 nnz = ai[r[i]+1] - ai[r[i]]; 921 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 922 cols = aj + ai[r[i]]; 923 lnk[i] = -1; /* marker to indicate if diagonal exists */ 924 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 925 nzi += nlnk; 926 927 /* make sure diagonal entry is included */ 928 if (diagonal_fill && lnk[i] == -1) { 929 fm = n; 930 while (lnk[fm] < i) fm = lnk[fm]; 931 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 932 lnk[fm] = i; 933 lnk_lvl[i] = 0; 934 nzi++; dcount++; 935 } 936 937 /* add pivot rows into the active row */ 938 nzbd = 0; 939 prow = lnk[n]; 940 while (prow < i) { 941 nnz = bdiag[prow]; 942 cols = bj_ptr[prow] + nnz + 1; 943 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 944 nnz = bi[prow+1] - bi[prow] - nnz - 1; 945 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 946 nzi += nlnk; 947 prow = lnk[prow]; 948 nzbd++; 949 } 950 bdiag[i] = nzbd; 951 bi[i+1] = bi[i] + nzi; 952 953 /* if free space is not available, make more free space */ 954 if (current_space->local_remaining<nzi) { 955 nnz = nzi*(n - i); /* estimated and max additional space needed */ 956 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 957 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 958 reallocs++; 959 } 960 961 /* copy data into free_space and free_space_lvl, then initialize lnk */ 962 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 963 bj_ptr[i] = current_space->array; 964 bjlvl_ptr[i] = current_space_lvl->array; 965 966 /* make sure the active row i has diagonal entry */ 967 if (*(bj_ptr[i]+bdiag[i]) != i) { 968 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 969 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 970 } 971 972 current_space->array += nzi; 973 current_space->local_used += nzi; 974 current_space->local_remaining -= nzi; 975 current_space_lvl->array += nzi; 976 current_space_lvl->local_used += nzi; 977 current_space_lvl->local_remaining -= nzi; 978 } 979 980 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 981 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 982 983 /* destroy list of free space and other temporary arrays */ 984 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 985 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 986 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 987 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 988 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 989 990 #if defined(PETSC_USE_DEBUG) 991 { 992 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 993 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 994 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 995 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 996 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 997 if (diagonal_fill) { 998 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 999 } 1000 } 1001 #endif 1002 1003 /* put together the new matrix */ 1004 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1005 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1006 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1007 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1008 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1009 b = (Mat_SeqAIJ*)(*fact)->data; 1010 b->freedata = PETSC_TRUE; 1011 b->singlemalloc = PETSC_FALSE; 1012 len = (bi[n] )*sizeof(PetscScalar); 1013 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1014 b->j = bj; 1015 b->i = bi; 1016 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1017 b->diag = bdiag; 1018 b->ilen = 0; 1019 b->imax = 0; 1020 b->row = isrow; 1021 b->col = iscol; 1022 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1023 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1024 b->icol = isicol; 1025 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1026 /* In b structure: Free imax, ilen, old a, old j. 1027 Allocate bdiag, solve_work, new a, new j */ 1028 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1029 b->maxnz = b->nz = bi[n] ; 1030 (*fact)->factor = FACTOR_LU; 1031 (*fact)->info.factor_mallocs = reallocs; 1032 (*fact)->info.fill_ratio_given = f; 1033 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1034 1035 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1036 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1037 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #include "src/mat/impls/sbaij/seq/sbaij.h" 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1044 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1045 { 1046 Mat C = *B; 1047 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1048 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1049 IS ip=b->row; 1050 PetscErrorCode ierr; 1051 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1052 PetscInt *ai=a->i,*aj=a->j; 1053 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1054 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1055 PetscReal zeropivot,rs,shiftnz; 1056 PetscReal shiftpd; 1057 ChShift_Ctx sctx; 1058 PetscInt newshift; 1059 1060 PetscFunctionBegin; 1061 shiftnz = info->shiftnz; 1062 shiftpd = info->shiftpd; 1063 zeropivot = info->zeropivot; 1064 1065 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1066 1067 /* initialization */ 1068 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1069 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1070 jl = il + mbs; 1071 rtmp = (MatScalar*)(jl + mbs); 1072 1073 sctx.shift_amount = 0; 1074 sctx.nshift = 0; 1075 do { 1076 sctx.chshift = PETSC_FALSE; 1077 for (i=0; i<mbs; i++) { 1078 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1079 } 1080 1081 for (k = 0; k<mbs; k++){ 1082 bval = ba + bi[k]; 1083 /* initialize k-th row by the perm[k]-th row of A */ 1084 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1085 for (j = jmin; j < jmax; j++){ 1086 col = rip[aj[j]]; 1087 if (col >= k){ /* only take upper triangular entry */ 1088 rtmp[col] = aa[j]; 1089 *bval++ = 0.0; /* for in-place factorization */ 1090 } 1091 } 1092 /* shift the diagonal of the matrix */ 1093 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1094 1095 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1096 dk = rtmp[k]; 1097 i = jl[k]; /* first row to be added to k_th row */ 1098 1099 while (i < k){ 1100 nexti = jl[i]; /* next row to be added to k_th row */ 1101 1102 /* compute multiplier, update diag(k) and U(i,k) */ 1103 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1104 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1105 dk += uikdi*ba[ili]; 1106 ba[ili] = uikdi; /* -U(i,k) */ 1107 1108 /* add multiple of row i to k-th row */ 1109 jmin = ili + 1; jmax = bi[i+1]; 1110 if (jmin < jmax){ 1111 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1112 /* update il and jl for row i */ 1113 il[i] = jmin; 1114 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1115 } 1116 i = nexti; 1117 } 1118 1119 /* shift the diagonals when zero pivot is detected */ 1120 /* compute rs=sum of abs(off-diagonal) */ 1121 rs = 0.0; 1122 jmin = bi[k]+1; 1123 nz = bi[k+1] - jmin; 1124 if (nz){ 1125 bcol = bj + jmin; 1126 while (nz--){ 1127 rs += PetscAbsScalar(rtmp[*bcol]); 1128 bcol++; 1129 } 1130 } 1131 1132 sctx.rs = rs; 1133 sctx.pv = dk; 1134 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1135 if (newshift == 1){ 1136 break; /* sctx.shift_amount is updated */ 1137 } else if (newshift == -1){ 1138 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1139 } 1140 1141 /* copy data into U(k,:) */ 1142 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1143 jmin = bi[k]+1; jmax = bi[k+1]; 1144 if (jmin < jmax) { 1145 for (j=jmin; j<jmax; j++){ 1146 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1147 } 1148 /* add the k-th row into il and jl */ 1149 il[k] = jmin; 1150 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1151 } 1152 } 1153 } while (sctx.chshift); 1154 ierr = PetscFree(il);CHKERRQ(ierr); 1155 1156 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1157 C->factor = FACTOR_CHOLESKY; 1158 C->assembled = PETSC_TRUE; 1159 C->preallocated = PETSC_TRUE; 1160 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1161 if (sctx.nshift){ 1162 if (shiftnz) { 1163 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1164 } else if (shiftpd) { 1165 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1166 } 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1173 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1174 { 1175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1176 Mat_SeqSBAIJ *b; 1177 Mat B; 1178 PetscErrorCode ierr; 1179 PetscTruth perm_identity; 1180 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1181 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1182 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1183 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1184 PetscReal fill=info->fill,levels=info->levels; 1185 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1186 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1187 PetscBT lnkbt; 1188 1189 PetscFunctionBegin; 1190 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1191 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1192 1193 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1194 ui[0] = 0; 1195 1196 /* special case that simply copies fill pattern */ 1197 if (!levels && perm_identity) { 1198 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1199 for (i=0; i<am; i++) { 1200 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1201 } 1202 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1203 cols = uj; 1204 for (i=0; i<am; i++) { 1205 aj = a->j + a->diag[i]; 1206 ncols = ui[i+1] - ui[i]; 1207 for (j=0; j<ncols; j++) *cols++ = *aj++; 1208 } 1209 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1210 /* initialization */ 1211 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1212 1213 /* jl: linked list for storing indices of the pivot rows 1214 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1215 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1216 il = jl + am; 1217 uj_ptr = (PetscInt**)(il + am); 1218 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1219 for (i=0; i<am; i++){ 1220 jl[i] = am; il[i] = 0; 1221 } 1222 1223 /* create and initialize a linked list for storing column indices of the active row k */ 1224 nlnk = am + 1; 1225 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1226 1227 /* initial FreeSpace size is fill*(ai[am]+1) */ 1228 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1229 current_space = free_space; 1230 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1231 current_space_lvl = free_space_lvl; 1232 1233 for (k=0; k<am; k++){ /* for each active row k */ 1234 /* initialize lnk by the column indices of row rip[k] of A */ 1235 nzk = 0; 1236 ncols = ai[rip[k]+1] - ai[rip[k]]; 1237 ncols_upper = 0; 1238 for (j=0; j<ncols; j++){ 1239 i = *(aj + ai[rip[k]] + j); 1240 if (rip[i] >= k){ /* only take upper triangular entry */ 1241 ajtmp[ncols_upper] = i; 1242 ncols_upper++; 1243 } 1244 } 1245 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1246 nzk += nlnk; 1247 1248 /* update lnk by computing fill-in for each pivot row to be merged in */ 1249 prow = jl[k]; /* 1st pivot row */ 1250 1251 while (prow < k){ 1252 nextprow = jl[prow]; 1253 1254 /* merge prow into k-th row */ 1255 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1256 jmax = ui[prow+1]; 1257 ncols = jmax-jmin; 1258 i = jmin - ui[prow]; 1259 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1260 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1261 j = *(uj - 1); 1262 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1263 nzk += nlnk; 1264 1265 /* update il and jl for prow */ 1266 if (jmin < jmax){ 1267 il[prow] = jmin; 1268 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1269 } 1270 prow = nextprow; 1271 } 1272 1273 /* if free space is not available, make more free space */ 1274 if (current_space->local_remaining<nzk) { 1275 i = am - k + 1; /* num of unfactored rows */ 1276 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1277 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1278 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1279 reallocs++; 1280 } 1281 1282 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1283 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1284 1285 /* add the k-th row into il and jl */ 1286 if (nzk > 1){ 1287 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1288 jl[k] = jl[i]; jl[i] = k; 1289 il[k] = ui[k] + 1; 1290 } 1291 uj_ptr[k] = current_space->array; 1292 uj_lvl_ptr[k] = current_space_lvl->array; 1293 1294 current_space->array += nzk; 1295 current_space->local_used += nzk; 1296 current_space->local_remaining -= nzk; 1297 1298 current_space_lvl->array += nzk; 1299 current_space_lvl->local_used += nzk; 1300 current_space_lvl->local_remaining -= nzk; 1301 1302 ui[k+1] = ui[k] + nzk; 1303 } 1304 1305 #if defined(PETSC_USE_DEBUG) 1306 if (ai[am] != 0) { 1307 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1308 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1309 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1310 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1311 } else { 1312 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1313 } 1314 #endif 1315 1316 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1317 ierr = PetscFree(jl);CHKERRQ(ierr); 1318 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1319 1320 /* destroy list of free space and other temporary array(s) */ 1321 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1322 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1323 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1324 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1325 1326 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1327 1328 /* put together the new matrix in MATSEQSBAIJ format */ 1329 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1330 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1331 B = *fact; 1332 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1333 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1334 1335 b = (Mat_SeqSBAIJ*)B->data; 1336 b->singlemalloc = PETSC_FALSE; 1337 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1338 b->j = uj; 1339 b->i = ui; 1340 b->diag = 0; 1341 b->ilen = 0; 1342 b->imax = 0; 1343 b->row = perm; 1344 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1345 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1346 b->icol = perm; 1347 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1348 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1349 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1350 b->maxnz = b->nz = ui[am]; 1351 1352 B->factor = FACTOR_CHOLESKY; 1353 B->info.factor_mallocs = reallocs; 1354 B->info.fill_ratio_given = fill; 1355 if (ai[am] != 0) { 1356 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1357 } else { 1358 B->info.fill_ratio_needed = 0.0; 1359 } 1360 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1361 if (perm_identity){ 1362 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1363 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1370 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1371 { 1372 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1373 Mat_SeqSBAIJ *b; 1374 Mat B; 1375 PetscErrorCode ierr; 1376 PetscTruth perm_identity; 1377 PetscReal fill = info->fill; 1378 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1379 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1380 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1381 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1382 PetscBT lnkbt; 1383 IS iperm; 1384 1385 PetscFunctionBegin; 1386 /* check whether perm is the identity mapping */ 1387 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1388 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1389 1390 if (!perm_identity){ 1391 /* check if perm is symmetric! */ 1392 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1393 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1394 for (i=0; i<am; i++) { 1395 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1396 } 1397 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1398 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1399 } 1400 1401 /* initialization */ 1402 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1403 ui[0] = 0; 1404 1405 /* jl: linked list for storing indices of the pivot rows 1406 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1407 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1408 il = jl + am; 1409 cols = il + am; 1410 ui_ptr = (PetscInt**)(cols + am); 1411 for (i=0; i<am; i++){ 1412 jl[i] = am; il[i] = 0; 1413 } 1414 1415 /* create and initialize a linked list for storing column indices of the active row k */ 1416 nlnk = am + 1; 1417 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1418 1419 /* initial FreeSpace size is fill*(ai[am]+1) */ 1420 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1421 current_space = free_space; 1422 1423 for (k=0; k<am; k++){ /* for each active row k */ 1424 /* initialize lnk by the column indices of row rip[k] of A */ 1425 nzk = 0; 1426 ncols = ai[rip[k]+1] - ai[rip[k]]; 1427 ncols_upper = 0; 1428 for (j=0; j<ncols; j++){ 1429 i = rip[*(aj + ai[rip[k]] + j)]; 1430 if (i >= k){ /* only take upper triangular entry */ 1431 cols[ncols_upper] = i; 1432 ncols_upper++; 1433 } 1434 } 1435 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1436 nzk += nlnk; 1437 1438 /* update lnk by computing fill-in for each pivot row to be merged in */ 1439 prow = jl[k]; /* 1st pivot row */ 1440 1441 while (prow < k){ 1442 nextprow = jl[prow]; 1443 /* merge prow into k-th row */ 1444 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1445 jmax = ui[prow+1]; 1446 ncols = jmax-jmin; 1447 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1448 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1449 nzk += nlnk; 1450 1451 /* update il and jl for prow */ 1452 if (jmin < jmax){ 1453 il[prow] = jmin; 1454 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1455 } 1456 prow = nextprow; 1457 } 1458 1459 /* if free space is not available, make more free space */ 1460 if (current_space->local_remaining<nzk) { 1461 i = am - k + 1; /* num of unfactored rows */ 1462 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1463 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1464 reallocs++; 1465 } 1466 1467 /* copy data into free space, then initialize lnk */ 1468 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1469 1470 /* add the k-th row into il and jl */ 1471 if (nzk-1 > 0){ 1472 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1473 jl[k] = jl[i]; jl[i] = k; 1474 il[k] = ui[k] + 1; 1475 } 1476 ui_ptr[k] = current_space->array; 1477 current_space->array += nzk; 1478 current_space->local_used += nzk; 1479 current_space->local_remaining -= nzk; 1480 1481 ui[k+1] = ui[k] + nzk; 1482 } 1483 1484 #if defined(PETSC_USE_DEBUG) 1485 if (ai[am] != 0) { 1486 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1487 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1488 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1489 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1490 } else { 1491 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1492 } 1493 #endif 1494 1495 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1496 ierr = PetscFree(jl);CHKERRQ(ierr); 1497 1498 /* destroy list of free space and other temporary array(s) */ 1499 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1500 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1501 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1502 1503 /* put together the new matrix in MATSEQSBAIJ format */ 1504 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1505 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1506 B = *fact; 1507 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1508 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1509 1510 b = (Mat_SeqSBAIJ*)B->data; 1511 b->singlemalloc = PETSC_FALSE; 1512 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1513 b->j = uj; 1514 b->i = ui; 1515 b->diag = 0; 1516 b->ilen = 0; 1517 b->imax = 0; 1518 b->row = perm; 1519 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1520 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1521 b->icol = perm; 1522 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1523 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1524 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1525 b->maxnz = b->nz = ui[am]; 1526 1527 B->factor = FACTOR_CHOLESKY; 1528 B->info.factor_mallocs = reallocs; 1529 B->info.fill_ratio_given = fill; 1530 if (ai[am] != 0) { 1531 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1532 } else { 1533 B->info.fill_ratio_needed = 0.0; 1534 } 1535 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1536 if (perm_identity){ 1537 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1538 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542