1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 311 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 nzi += nlnk; 313 314 /* add pivot rows into linked list */ 315 row = lnk[n]; 316 while (row < i) { 317 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 318 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 319 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 320 nzi += nlnk; 321 row = lnk[row]; 322 } 323 bi[i+1] = bi[i] + nzi; 324 im[i] = nzi; 325 326 /* mark bdiag */ 327 nzbd = 0; 328 nnz = nzi; 329 k = lnk[n]; 330 while (nnz-- && k < i){ 331 nzbd++; 332 k = lnk[k]; 333 } 334 bdiag[i] = bi[i] + nzbd; 335 336 /* if free space is not available, make more free space */ 337 if (current_space->local_remaining<nzi) { 338 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 339 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 340 reallocs++; 341 } 342 343 /* copy data into free space, then initialize lnk */ 344 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 345 bi_ptr[i] = current_space->array; 346 current_space->array += nzi; 347 current_space->local_used += nzi; 348 current_space->local_remaining -= nzi; 349 } 350 #if defined(PETSC_USE_DEBUG) 351 if (ai[n] != 0) { 352 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 353 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 354 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 355 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 356 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 357 } else { 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 359 } 360 #endif 361 362 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 363 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 364 365 /* destroy list of free space and other temporary array(s) */ 366 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 367 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 368 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 369 ierr = PetscFree(cols);CHKERRQ(ierr); 370 371 /* put together the new matrix */ 372 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 373 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 374 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 375 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 376 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 377 b = (Mat_SeqAIJ*)(*B)->data; 378 b->freedata = PETSC_TRUE; 379 b->singlemalloc = PETSC_FALSE; 380 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 381 b->j = bj; 382 b->i = bi; 383 b->diag = bdiag; 384 b->ilen = 0; 385 b->imax = 0; 386 b->row = isrow; 387 b->col = iscol; 388 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 389 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 390 b->icol = isicol; 391 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 392 393 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 394 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 395 b->maxnz = b->nz = bi[n] ; 396 397 (*B)->factor = FACTOR_LU; 398 (*B)->info.factor_mallocs = reallocs; 399 (*B)->info.fill_ratio_given = f; 400 401 if (ai[n] != 0) { 402 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 403 } else { 404 (*B)->info.fill_ratio_needed = 0.0; 405 } 406 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 407 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 408 PetscFunctionReturn(0); 409 } 410 411 /* ----------------------------------------------------------- */ 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 414 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 415 { 416 Mat C=*B; 417 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 418 IS isrow = b->row,isicol = b->icol; 419 PetscErrorCode ierr; 420 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 421 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 422 PetscInt *diag_offset = b->diag,diag,*pj; 423 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 424 PetscScalar d; 425 PetscReal rs; 426 LUShift_Ctx sctx; 427 PetscInt newshift; 428 429 PetscFunctionBegin; 430 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 431 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 432 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 433 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 434 rtmps = rtmp; ics = ic; 435 436 if (!a->diag) { 437 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 438 } 439 /* if both shift schemes are chosen by user, only use info->shiftpd */ 440 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 441 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 442 PetscInt *aai = a->i,*ddiag = a->diag; 443 sctx.shift_top = 0; 444 for (i=0; i<n; i++) { 445 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 446 d = (a->a)[ddiag[i]]; 447 rs = -PetscAbsScalar(d) - PetscRealPart(d); 448 v = a->a+aai[i]; 449 nz = aai[i+1] - aai[i]; 450 for (j=0; j<nz; j++) 451 rs += PetscAbsScalar(v[j]); 452 if (rs>sctx.shift_top) sctx.shift_top = rs; 453 } 454 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 455 sctx.shift_top *= 1.1; 456 sctx.nshift_max = 5; 457 sctx.shift_lo = 0.; 458 sctx.shift_hi = 1.; 459 } 460 461 sctx.shift_amount = 0; 462 sctx.nshift = 0; 463 do { 464 sctx.lushift = PETSC_FALSE; 465 for (i=0; i<n; i++){ 466 nz = bi[i+1] - bi[i]; 467 bjtmp = bj + bi[i]; 468 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 469 470 /* load in initial (unfactored row) */ 471 nz = a->i[r[i]+1] - a->i[r[i]]; 472 ajtmp = a->j + a->i[r[i]]; 473 v = a->a + a->i[r[i]]; 474 for (j=0; j<nz; j++) { 475 rtmp[ics[ajtmp[j]]] = v[j]; 476 } 477 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 478 479 row = *bjtmp++; 480 while (row < i) { 481 pc = rtmp + row; 482 if (*pc != 0.0) { 483 pv = b->a + diag_offset[row]; 484 pj = b->j + diag_offset[row] + 1; 485 multiplier = *pc / *pv++; 486 *pc = multiplier; 487 nz = bi[row+1] - diag_offset[row] - 1; 488 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 489 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 490 } 491 row = *bjtmp++; 492 } 493 /* finished row so stick it into b->a */ 494 pv = b->a + bi[i] ; 495 pj = b->j + bi[i] ; 496 nz = bi[i+1] - bi[i]; 497 diag = diag_offset[i] - bi[i]; 498 rs = 0.0; 499 for (j=0; j<nz; j++) { 500 pv[j] = rtmps[pj[j]]; 501 if (j != diag) rs += PetscAbsScalar(pv[j]); 502 } 503 504 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 505 sctx.rs = rs; 506 sctx.pv = pv[diag]; 507 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 508 if (newshift == 1){ 509 break; /* sctx.shift_amount is updated */ 510 } else if (newshift == -1){ 511 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 512 } 513 } 514 515 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 516 /* 517 * if no shift in this attempt & shifting & started shifting & can refine, 518 * then try lower shift 519 */ 520 sctx.shift_hi = info->shift_fraction; 521 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 522 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 523 sctx.lushift = PETSC_TRUE; 524 sctx.nshift++; 525 } 526 } while (sctx.lushift); 527 528 /* invert diagonal entries for simplier triangular solves */ 529 for (i=0; i<n; i++) { 530 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 531 } 532 533 ierr = PetscFree(rtmp);CHKERRQ(ierr); 534 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 535 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 536 C->factor = FACTOR_LU; 537 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 538 C->assembled = PETSC_TRUE; 539 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 540 if (sctx.nshift){ 541 if (info->shiftnz) { 542 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 543 } else if (info->shiftpd) { 544 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 545 } 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 552 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 553 { 554 PetscFunctionBegin; 555 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 556 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 557 PetscFunctionReturn(0); 558 } 559 560 561 /* ----------------------------------------------------------- */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatLUFactor_SeqAIJ" 564 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 565 { 566 PetscErrorCode ierr; 567 Mat C; 568 569 PetscFunctionBegin; 570 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 571 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 572 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 573 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 574 PetscFunctionReturn(0); 575 } 576 /* ----------------------------------------------------------- */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatSolve_SeqAIJ" 579 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 580 { 581 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 582 IS iscol = a->col,isrow = a->row; 583 PetscErrorCode ierr; 584 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 585 PetscInt nz,*rout,*cout; 586 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 587 588 PetscFunctionBegin; 589 if (!n) PetscFunctionReturn(0); 590 591 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 592 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 593 tmp = a->solve_work; 594 595 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 596 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 597 598 /* forward solve the lower triangular */ 599 tmp[0] = b[*r++]; 600 tmps = tmp; 601 for (i=1; i<n; i++) { 602 v = aa + ai[i] ; 603 vi = aj + ai[i] ; 604 nz = a->diag[i] - ai[i]; 605 sum = b[*r++]; 606 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 607 tmp[i] = sum; 608 } 609 610 /* backward solve the upper triangular */ 611 for (i=n-1; i>=0; i--){ 612 v = aa + a->diag[i] + 1; 613 vi = aj + a->diag[i] + 1; 614 nz = ai[i+1] - a->diag[i] - 1; 615 sum = tmp[i]; 616 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 617 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 618 } 619 620 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 621 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 622 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 623 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 624 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } 627 628 /* ----------------------------------------------------------- */ 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 631 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 632 { 633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 634 PetscErrorCode ierr; 635 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 636 PetscScalar *x,*b,*aa = a->a; 637 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 638 PetscInt adiag_i,i,*vi,nz,ai_i; 639 PetscScalar *v,sum; 640 #endif 641 642 PetscFunctionBegin; 643 if (!n) PetscFunctionReturn(0); 644 645 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 646 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 647 648 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 649 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 650 #else 651 /* forward solve the lower triangular */ 652 x[0] = b[0]; 653 for (i=1; i<n; i++) { 654 ai_i = ai[i]; 655 v = aa + ai_i; 656 vi = aj + ai_i; 657 nz = adiag[i] - ai_i; 658 sum = b[i]; 659 while (nz--) sum -= *v++ * x[*vi++]; 660 x[i] = sum; 661 } 662 663 /* backward solve the upper triangular */ 664 for (i=n-1; i>=0; i--){ 665 adiag_i = adiag[i]; 666 v = aa + adiag_i + 1; 667 vi = aj + adiag_i + 1; 668 nz = ai[i+1] - adiag_i - 1; 669 sum = x[i]; 670 while (nz--) sum -= *v++ * x[*vi++]; 671 x[i] = sum*aa[adiag_i]; 672 } 673 #endif 674 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 675 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 676 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 682 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 683 { 684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 685 IS iscol = a->col,isrow = a->row; 686 PetscErrorCode ierr; 687 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 688 PetscInt nz,*rout,*cout; 689 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 690 691 PetscFunctionBegin; 692 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 693 694 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 695 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 696 tmp = a->solve_work; 697 698 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 699 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 700 701 /* forward solve the lower triangular */ 702 tmp[0] = b[*r++]; 703 for (i=1; i<n; i++) { 704 v = aa + ai[i] ; 705 vi = aj + ai[i] ; 706 nz = a->diag[i] - ai[i]; 707 sum = b[*r++]; 708 while (nz--) sum -= *v++ * tmp[*vi++ ]; 709 tmp[i] = sum; 710 } 711 712 /* backward solve the upper triangular */ 713 for (i=n-1; i>=0; i--){ 714 v = aa + a->diag[i] + 1; 715 vi = aj + a->diag[i] + 1; 716 nz = ai[i+1] - a->diag[i] - 1; 717 sum = tmp[i]; 718 while (nz--) sum -= *v++ * tmp[*vi++ ]; 719 tmp[i] = sum*aa[a->diag[i]]; 720 x[*c--] += tmp[i]; 721 } 722 723 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 724 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 725 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 726 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 727 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 728 729 PetscFunctionReturn(0); 730 } 731 /* -------------------------------------------------------------------*/ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 734 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 735 { 736 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 737 IS iscol = a->col,isrow = a->row; 738 PetscErrorCode ierr; 739 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 740 PetscInt nz,*rout,*cout,*diag = a->diag; 741 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 742 743 PetscFunctionBegin; 744 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 745 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 746 tmp = a->solve_work; 747 748 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 749 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 750 751 /* copy the b into temp work space according to permutation */ 752 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 753 754 /* forward solve the U^T */ 755 for (i=0; i<n; i++) { 756 v = aa + diag[i] ; 757 vi = aj + diag[i] + 1; 758 nz = ai[i+1] - diag[i] - 1; 759 s1 = tmp[i]; 760 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 761 while (nz--) { 762 tmp[*vi++ ] -= (*v++)*s1; 763 } 764 tmp[i] = s1; 765 } 766 767 /* backward solve the L^T */ 768 for (i=n-1; i>=0; i--){ 769 v = aa + diag[i] - 1 ; 770 vi = aj + diag[i] - 1 ; 771 nz = diag[i] - ai[i]; 772 s1 = tmp[i]; 773 while (nz--) { 774 tmp[*vi-- ] -= (*v--)*s1; 775 } 776 } 777 778 /* copy tmp into x according to permutation */ 779 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 780 781 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 782 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 783 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 784 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 785 786 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 792 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 793 { 794 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 795 IS iscol = a->col,isrow = a->row; 796 PetscErrorCode ierr; 797 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 798 PetscInt nz,*rout,*cout,*diag = a->diag; 799 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 800 801 PetscFunctionBegin; 802 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 803 804 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 805 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 806 tmp = a->solve_work; 807 808 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 809 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 810 811 /* copy the b into temp work space according to permutation */ 812 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 813 814 /* forward solve the U^T */ 815 for (i=0; i<n; i++) { 816 v = aa + diag[i] ; 817 vi = aj + diag[i] + 1; 818 nz = ai[i+1] - diag[i] - 1; 819 tmp[i] *= *v++; 820 while (nz--) { 821 tmp[*vi++ ] -= (*v++)*tmp[i]; 822 } 823 } 824 825 /* backward solve the L^T */ 826 for (i=n-1; i>=0; i--){ 827 v = aa + diag[i] - 1 ; 828 vi = aj + diag[i] - 1 ; 829 nz = diag[i] - ai[i]; 830 while (nz--) { 831 tmp[*vi-- ] -= (*v--)*tmp[i]; 832 } 833 } 834 835 /* copy tmp into x according to permutation */ 836 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 837 838 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 839 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 840 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 841 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 842 843 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 /* ----------------------------------------------------------------*/ 847 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 851 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 852 { 853 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 854 IS isicol; 855 PetscErrorCode ierr; 856 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 857 PetscInt *bi,*cols,nnz,*cols_lvl; 858 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 859 PetscInt i,levels,diagonal_fill; 860 PetscTruth col_identity,row_identity; 861 PetscReal f; 862 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 863 PetscBT lnkbt; 864 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 865 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 866 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 867 868 PetscFunctionBegin; 869 f = info->fill; 870 levels = (PetscInt)info->levels; 871 diagonal_fill = (PetscInt)info->diagonal_fill; 872 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 873 874 /* special case that simply copies fill pattern */ 875 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 876 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 877 if (!levels && row_identity && col_identity) { 878 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 879 (*fact)->factor = FACTOR_LU; 880 b = (Mat_SeqAIJ*)(*fact)->data; 881 if (!b->diag) { 882 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 883 } 884 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 885 b->row = isrow; 886 b->col = iscol; 887 b->icol = isicol; 888 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 889 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 890 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 891 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 896 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 897 898 /* get new row pointers */ 899 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 900 bi[0] = 0; 901 /* bdiag is location of diagonal in factor */ 902 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 903 bdiag[0] = 0; 904 905 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 906 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 907 908 /* create a linked list for storing column indices of the active row */ 909 nlnk = n + 1; 910 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 911 912 /* initial FreeSpace size is f*(ai[n]+1) */ 913 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 914 current_space = free_space; 915 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 916 current_space_lvl = free_space_lvl; 917 918 for (i=0; i<n; i++) { 919 nzi = 0; 920 /* copy current row into linked list */ 921 nnz = ai[r[i]+1] - ai[r[i]]; 922 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 923 cols = aj + ai[r[i]]; 924 lnk[i] = -1; /* marker to indicate if diagonal exists */ 925 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 926 nzi += nlnk; 927 928 /* make sure diagonal entry is included */ 929 if (diagonal_fill && lnk[i] == -1) { 930 fm = n; 931 while (lnk[fm] < i) fm = lnk[fm]; 932 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 933 lnk[fm] = i; 934 lnk_lvl[i] = 0; 935 nzi++; dcount++; 936 } 937 938 /* add pivot rows into the active row */ 939 nzbd = 0; 940 prow = lnk[n]; 941 while (prow < i) { 942 nnz = bdiag[prow]; 943 cols = bj_ptr[prow] + nnz + 1; 944 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 945 nnz = bi[prow+1] - bi[prow] - nnz - 1; 946 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 947 nzi += nlnk; 948 prow = lnk[prow]; 949 nzbd++; 950 } 951 bdiag[i] = nzbd; 952 bi[i+1] = bi[i] + nzi; 953 954 /* if free space is not available, make more free space */ 955 if (current_space->local_remaining<nzi) { 956 nnz = nzi*(n - i); /* estimated and max additional space needed */ 957 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 958 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 959 reallocs++; 960 } 961 962 /* copy data into free_space and free_space_lvl, then initialize lnk */ 963 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 964 bj_ptr[i] = current_space->array; 965 bjlvl_ptr[i] = current_space_lvl->array; 966 967 /* make sure the active row i has diagonal entry */ 968 if (*(bj_ptr[i]+bdiag[i]) != i) { 969 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 970 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 971 } 972 973 current_space->array += nzi; 974 current_space->local_used += nzi; 975 current_space->local_remaining -= nzi; 976 current_space_lvl->array += nzi; 977 current_space_lvl->local_used += nzi; 978 current_space_lvl->local_remaining -= nzi; 979 } 980 981 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 982 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 983 984 /* destroy list of free space and other temporary arrays */ 985 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 986 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 987 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 988 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 989 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 990 991 #if defined(PETSC_USE_DEBUG) 992 { 993 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 994 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 995 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 996 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 998 if (diagonal_fill) { 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1000 } 1001 } 1002 #endif 1003 1004 /* put together the new matrix */ 1005 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1006 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1007 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1008 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1009 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1010 b = (Mat_SeqAIJ*)(*fact)->data; 1011 b->freedata = PETSC_TRUE; 1012 b->singlemalloc = PETSC_FALSE; 1013 len = (bi[n] )*sizeof(PetscScalar); 1014 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1015 b->j = bj; 1016 b->i = bi; 1017 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1018 b->diag = bdiag; 1019 b->ilen = 0; 1020 b->imax = 0; 1021 b->row = isrow; 1022 b->col = iscol; 1023 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1024 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1025 b->icol = isicol; 1026 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1027 /* In b structure: Free imax, ilen, old a, old j. 1028 Allocate bdiag, solve_work, new a, new j */ 1029 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1030 b->maxnz = b->nz = bi[n] ; 1031 (*fact)->factor = FACTOR_LU; 1032 (*fact)->info.factor_mallocs = reallocs; 1033 (*fact)->info.fill_ratio_given = f; 1034 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1035 1036 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1037 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #include "src/mat/impls/sbaij/seq/sbaij.h" 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1045 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1046 { 1047 Mat C = *B; 1048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1049 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1050 IS ip=b->row; 1051 PetscErrorCode ierr; 1052 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1053 PetscInt *ai=a->i,*aj=a->j; 1054 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1055 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1056 PetscReal zeropivot,rs,shiftnz; 1057 PetscReal shiftpd; 1058 ChShift_Ctx sctx; 1059 PetscInt newshift; 1060 1061 PetscFunctionBegin; 1062 shiftnz = info->shiftnz; 1063 shiftpd = info->shiftpd; 1064 zeropivot = info->zeropivot; 1065 1066 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1067 1068 /* initialization */ 1069 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1070 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1071 jl = il + mbs; 1072 rtmp = (MatScalar*)(jl + mbs); 1073 1074 sctx.shift_amount = 0; 1075 sctx.nshift = 0; 1076 do { 1077 sctx.chshift = PETSC_FALSE; 1078 for (i=0; i<mbs; i++) { 1079 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1080 } 1081 1082 for (k = 0; k<mbs; k++){ 1083 bval = ba + bi[k]; 1084 /* initialize k-th row by the perm[k]-th row of A */ 1085 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1086 for (j = jmin; j < jmax; j++){ 1087 col = rip[aj[j]]; 1088 if (col >= k){ /* only take upper triangular entry */ 1089 rtmp[col] = aa[j]; 1090 *bval++ = 0.0; /* for in-place factorization */ 1091 } 1092 } 1093 /* shift the diagonal of the matrix */ 1094 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1095 1096 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1097 dk = rtmp[k]; 1098 i = jl[k]; /* first row to be added to k_th row */ 1099 1100 while (i < k){ 1101 nexti = jl[i]; /* next row to be added to k_th row */ 1102 1103 /* compute multiplier, update diag(k) and U(i,k) */ 1104 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1105 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1106 dk += uikdi*ba[ili]; 1107 ba[ili] = uikdi; /* -U(i,k) */ 1108 1109 /* add multiple of row i to k-th row */ 1110 jmin = ili + 1; jmax = bi[i+1]; 1111 if (jmin < jmax){ 1112 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1113 /* update il and jl for row i */ 1114 il[i] = jmin; 1115 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1116 } 1117 i = nexti; 1118 } 1119 1120 /* shift the diagonals when zero pivot is detected */ 1121 /* compute rs=sum of abs(off-diagonal) */ 1122 rs = 0.0; 1123 jmin = bi[k]+1; 1124 nz = bi[k+1] - jmin; 1125 if (nz){ 1126 bcol = bj + jmin; 1127 while (nz--){ 1128 rs += PetscAbsScalar(rtmp[*bcol]); 1129 bcol++; 1130 } 1131 } 1132 1133 sctx.rs = rs; 1134 sctx.pv = dk; 1135 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1136 if (newshift == 1){ 1137 break; /* sctx.shift_amount is updated */ 1138 } else if (newshift == -1){ 1139 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1140 } 1141 1142 /* copy data into U(k,:) */ 1143 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1144 jmin = bi[k]+1; jmax = bi[k+1]; 1145 if (jmin < jmax) { 1146 for (j=jmin; j<jmax; j++){ 1147 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1148 } 1149 /* add the k-th row into il and jl */ 1150 il[k] = jmin; 1151 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1152 } 1153 } 1154 } while (sctx.chshift); 1155 ierr = PetscFree(il);CHKERRQ(ierr); 1156 1157 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1158 C->factor = FACTOR_CHOLESKY; 1159 C->assembled = PETSC_TRUE; 1160 C->preallocated = PETSC_TRUE; 1161 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1162 if (sctx.nshift){ 1163 if (shiftnz) { 1164 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1165 } else if (shiftpd) { 1166 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1167 } 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1174 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1175 { 1176 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1177 Mat_SeqSBAIJ *b; 1178 Mat B; 1179 PetscErrorCode ierr; 1180 PetscTruth perm_identity; 1181 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1182 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1183 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1184 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1185 PetscReal fill=info->fill,levels=info->levels; 1186 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1187 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1188 PetscBT lnkbt; 1189 1190 PetscFunctionBegin; 1191 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1192 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1193 1194 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1195 ui[0] = 0; 1196 1197 /* special case that simply copies fill pattern */ 1198 if (!levels && perm_identity) { 1199 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1200 for (i=0; i<am; i++) { 1201 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1202 } 1203 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1204 cols = uj; 1205 for (i=0; i<am; i++) { 1206 aj = a->j + a->diag[i]; 1207 ncols = ui[i+1] - ui[i]; 1208 for (j=0; j<ncols; j++) *cols++ = *aj++; 1209 } 1210 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1211 /* initialization */ 1212 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1213 1214 /* jl: linked list for storing indices of the pivot rows 1215 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1216 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1217 il = jl + am; 1218 uj_ptr = (PetscInt**)(il + am); 1219 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1220 for (i=0; i<am; i++){ 1221 jl[i] = am; il[i] = 0; 1222 } 1223 1224 /* create and initialize a linked list for storing column indices of the active row k */ 1225 nlnk = am + 1; 1226 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1227 1228 /* initial FreeSpace size is fill*(ai[am]+1) */ 1229 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1230 current_space = free_space; 1231 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1232 current_space_lvl = free_space_lvl; 1233 1234 for (k=0; k<am; k++){ /* for each active row k */ 1235 /* initialize lnk by the column indices of row rip[k] of A */ 1236 nzk = 0; 1237 ncols = ai[rip[k]+1] - ai[rip[k]]; 1238 ncols_upper = 0; 1239 for (j=0; j<ncols; j++){ 1240 i = *(aj + ai[rip[k]] + j); 1241 if (rip[i] >= k){ /* only take upper triangular entry */ 1242 ajtmp[ncols_upper] = i; 1243 ncols_upper++; 1244 } 1245 } 1246 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1247 nzk += nlnk; 1248 1249 /* update lnk by computing fill-in for each pivot row to be merged in */ 1250 prow = jl[k]; /* 1st pivot row */ 1251 1252 while (prow < k){ 1253 nextprow = jl[prow]; 1254 1255 /* merge prow into k-th row */ 1256 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1257 jmax = ui[prow+1]; 1258 ncols = jmax-jmin; 1259 i = jmin - ui[prow]; 1260 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1261 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1262 j = *(uj - 1); 1263 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1264 nzk += nlnk; 1265 1266 /* update il and jl for prow */ 1267 if (jmin < jmax){ 1268 il[prow] = jmin; 1269 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1270 } 1271 prow = nextprow; 1272 } 1273 1274 /* if free space is not available, make more free space */ 1275 if (current_space->local_remaining<nzk) { 1276 i = am - k + 1; /* num of unfactored rows */ 1277 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1278 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1279 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1280 reallocs++; 1281 } 1282 1283 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1284 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1285 1286 /* add the k-th row into il and jl */ 1287 if (nzk > 1){ 1288 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1289 jl[k] = jl[i]; jl[i] = k; 1290 il[k] = ui[k] + 1; 1291 } 1292 uj_ptr[k] = current_space->array; 1293 uj_lvl_ptr[k] = current_space_lvl->array; 1294 1295 current_space->array += nzk; 1296 current_space->local_used += nzk; 1297 current_space->local_remaining -= nzk; 1298 1299 current_space_lvl->array += nzk; 1300 current_space_lvl->local_used += nzk; 1301 current_space_lvl->local_remaining -= nzk; 1302 1303 ui[k+1] = ui[k] + nzk; 1304 } 1305 1306 #if defined(PETSC_USE_DEBUG) 1307 if (ai[am] != 0) { 1308 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1309 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1310 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1311 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1314 } 1315 #endif 1316 1317 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1318 ierr = PetscFree(jl);CHKERRQ(ierr); 1319 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1320 1321 /* destroy list of free space and other temporary array(s) */ 1322 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1323 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1324 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1325 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1326 1327 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1328 1329 /* put together the new matrix in MATSEQSBAIJ format */ 1330 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1331 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1332 B = *fact; 1333 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1334 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1335 1336 b = (Mat_SeqSBAIJ*)B->data; 1337 b->singlemalloc = PETSC_FALSE; 1338 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1339 b->j = uj; 1340 b->i = ui; 1341 b->diag = 0; 1342 b->ilen = 0; 1343 b->imax = 0; 1344 b->row = perm; 1345 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1346 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1347 b->icol = perm; 1348 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1349 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1350 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1351 b->maxnz = b->nz = ui[am]; 1352 1353 B->factor = FACTOR_CHOLESKY; 1354 B->info.factor_mallocs = reallocs; 1355 B->info.fill_ratio_given = fill; 1356 if (ai[am] != 0) { 1357 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1358 } else { 1359 B->info.fill_ratio_needed = 0.0; 1360 } 1361 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1362 if (perm_identity){ 1363 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1364 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1371 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1372 { 1373 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1374 Mat_SeqSBAIJ *b; 1375 Mat B; 1376 PetscErrorCode ierr; 1377 PetscTruth perm_identity; 1378 PetscReal fill = info->fill; 1379 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1380 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1381 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1382 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1383 PetscBT lnkbt; 1384 IS iperm; 1385 1386 PetscFunctionBegin; 1387 /* check whether perm is the identity mapping */ 1388 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1389 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1390 1391 if (!perm_identity){ 1392 /* check if perm is symmetric! */ 1393 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1394 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1395 for (i=0; i<am; i++) { 1396 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1397 } 1398 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1399 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1400 } 1401 1402 /* initialization */ 1403 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1404 ui[0] = 0; 1405 1406 /* jl: linked list for storing indices of the pivot rows 1407 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1408 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1409 il = jl + am; 1410 cols = il + am; 1411 ui_ptr = (PetscInt**)(cols + am); 1412 for (i=0; i<am; i++){ 1413 jl[i] = am; il[i] = 0; 1414 } 1415 1416 /* create and initialize a linked list for storing column indices of the active row k */ 1417 nlnk = am + 1; 1418 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1419 1420 /* initial FreeSpace size is fill*(ai[am]+1) */ 1421 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1422 current_space = free_space; 1423 1424 for (k=0; k<am; k++){ /* for each active row k */ 1425 /* initialize lnk by the column indices of row rip[k] of A */ 1426 nzk = 0; 1427 ncols = ai[rip[k]+1] - ai[rip[k]]; 1428 ncols_upper = 0; 1429 for (j=0; j<ncols; j++){ 1430 i = rip[*(aj + ai[rip[k]] + j)]; 1431 if (i >= k){ /* only take upper triangular entry */ 1432 cols[ncols_upper] = i; 1433 ncols_upper++; 1434 } 1435 } 1436 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1437 nzk += nlnk; 1438 1439 /* update lnk by computing fill-in for each pivot row to be merged in */ 1440 prow = jl[k]; /* 1st pivot row */ 1441 1442 while (prow < k){ 1443 nextprow = jl[prow]; 1444 /* merge prow into k-th row */ 1445 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1446 jmax = ui[prow+1]; 1447 ncols = jmax-jmin; 1448 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1449 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1450 nzk += nlnk; 1451 1452 /* update il and jl for prow */ 1453 if (jmin < jmax){ 1454 il[prow] = jmin; 1455 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1456 } 1457 prow = nextprow; 1458 } 1459 1460 /* if free space is not available, make more free space */ 1461 if (current_space->local_remaining<nzk) { 1462 i = am - k + 1; /* num of unfactored rows */ 1463 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1464 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1465 reallocs++; 1466 } 1467 1468 /* copy data into free space, then initialize lnk */ 1469 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1470 1471 /* add the k-th row into il and jl */ 1472 if (nzk-1 > 0){ 1473 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1474 jl[k] = jl[i]; jl[i] = k; 1475 il[k] = ui[k] + 1; 1476 } 1477 ui_ptr[k] = current_space->array; 1478 current_space->array += nzk; 1479 current_space->local_used += nzk; 1480 current_space->local_remaining -= nzk; 1481 1482 ui[k+1] = ui[k] + nzk; 1483 } 1484 1485 #if defined(PETSC_USE_DEBUG) 1486 if (ai[am] != 0) { 1487 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1488 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1489 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1490 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1491 } else { 1492 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1493 } 1494 #endif 1495 1496 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1497 ierr = PetscFree(jl);CHKERRQ(ierr); 1498 1499 /* destroy list of free space and other temporary array(s) */ 1500 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1501 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1502 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1503 1504 /* put together the new matrix in MATSEQSBAIJ format */ 1505 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1506 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1507 B = *fact; 1508 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1509 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1510 1511 b = (Mat_SeqSBAIJ*)B->data; 1512 b->singlemalloc = PETSC_FALSE; 1513 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1514 b->j = uj; 1515 b->i = ui; 1516 b->diag = 0; 1517 b->ilen = 0; 1518 b->imax = 0; 1519 b->row = perm; 1520 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1521 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1522 b->icol = perm; 1523 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1524 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1525 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1526 b->maxnz = b->nz = ui[am]; 1527 1528 B->factor = FACTOR_CHOLESKY; 1529 B->info.factor_mallocs = reallocs; 1530 B->info.fill_ratio_given = fill; 1531 if (ai[am] != 0) { 1532 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1533 } else { 1534 B->info.fill_ratio_needed = 0.0; 1535 } 1536 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1537 if (perm_identity){ 1538 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1539 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543