1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af); 251 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 252 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 253 PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n"); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 if (ai[n] != 0) { 355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 356 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 357 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 358 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 359 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 360 } else { 361 PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 362 } 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 375 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 376 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 377 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 378 b = (Mat_SeqAIJ*)(*B)->data; 379 ierr = PetscFree(b->imax);CHKERRQ(ierr); 380 b->singlemalloc = PETSC_FALSE; 381 /* the next line frees the default space generated by the Create() */ 382 ierr = PetscFree(b->a);CHKERRQ(ierr); 383 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 384 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 385 b->j = bj; 386 b->i = bi; 387 b->diag = bdiag; 388 b->ilen = 0; 389 b->imax = 0; 390 b->row = isrow; 391 b->col = iscol; 392 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 393 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 394 b->icol = isicol; 395 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 396 397 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 398 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 399 b->maxnz = b->nz = bi[n] ; 400 401 (*B)->factor = FACTOR_LU; 402 (*B)->info.factor_mallocs = reallocs; 403 (*B)->info.fill_ratio_given = f; 404 405 if (ai[n] != 0) { 406 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 407 } else { 408 (*B)->info.fill_ratio_needed = 0.0; 409 } 410 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 411 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 412 PetscFunctionReturn(0); 413 } 414 415 /* ----------------------------------------------------------- */ 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 418 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 419 { 420 Mat C=*B; 421 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 422 IS isrow = b->row,isicol = b->icol; 423 PetscErrorCode ierr; 424 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 425 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 426 PetscInt *diag_offset = b->diag,diag,*pj; 427 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 428 PetscReal rs,d; 429 PetscReal row_shift; 430 LUShift_Ctx sctx; 431 PetscInt newshift; 432 433 PetscFunctionBegin; 434 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 435 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 436 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 437 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 438 rtmps = rtmp; ics = ic; 439 440 if (!a->diag) { 441 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 442 } 443 /* if both shift schemes are chosen by user, only use info->shiftpd */ 444 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 445 if (info->shiftpd) { /* set sctx.shift_top=max{row_shift} */ 446 PetscInt *aai = a->i,*ddiag = a->diag; 447 sctx.shift_top = 0; 448 for (i=0; i<n; i++) { 449 d = PetscRealPart((a->a)[ddiag[i]]); 450 /* calculate amt of shift needed for this row */ 451 if (d<=0) { 452 row_shift = 0; 453 } else { 454 row_shift = -2*PetscAbsScalar((a->a)[ddiag[i]]); 455 } 456 v = a->a+aai[i]; 457 nz = aai[i+1] - aai[i]; 458 for (j=0; j<nz; j++) 459 row_shift += PetscAbsScalar(v[j]); 460 if (row_shift>sctx.shift_top) sctx.shift_top = row_shift; 461 } 462 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 463 sctx.shift_top *= 1.1; 464 sctx.nshift_max = 5; 465 sctx.shift_lo = 0.; 466 sctx.shift_hi = 1.; 467 } 468 469 sctx.shift_amount = 0; 470 sctx.nshift = 0; 471 do { 472 sctx.lushift = PETSC_FALSE; 473 for (i=0; i<n; i++){ 474 nz = bi[i+1] - bi[i]; 475 bjtmp = bj + bi[i]; 476 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 477 478 /* load in initial (unfactored row) */ 479 nz = a->i[r[i]+1] - a->i[r[i]]; 480 ajtmp = a->j + a->i[r[i]]; 481 v = a->a + a->i[r[i]]; 482 for (j=0; j<nz; j++) { 483 rtmp[ics[ajtmp[j]]] = v[j]; 484 } 485 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 486 487 row = *bjtmp++; 488 while (row < i) { 489 pc = rtmp + row; 490 if (*pc != 0.0) { 491 pv = b->a + diag_offset[row]; 492 pj = b->j + diag_offset[row] + 1; 493 multiplier = *pc / *pv++; 494 *pc = multiplier; 495 nz = bi[row+1] - diag_offset[row] - 1; 496 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 497 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 498 } 499 row = *bjtmp++; 500 } 501 /* finished row so stick it into b->a */ 502 pv = b->a + bi[i] ; 503 pj = b->j + bi[i] ; 504 nz = bi[i+1] - bi[i]; 505 diag = diag_offset[i] - bi[i]; 506 rs = 0.0; 507 for (j=0; j<nz; j++) { 508 pv[j] = rtmps[pj[j]]; 509 if (j != diag) rs += PetscAbsScalar(pv[j]); 510 } 511 512 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 513 sctx.rs = rs; 514 sctx.pv = pv[diag]; 515 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 516 if (newshift == 1){ 517 break; /* sctx.shift_amount is updated */ 518 } else if (newshift == -1){ 519 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 520 } 521 } 522 523 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 524 /* 525 * if no shift in this attempt & shifting & started shifting & can refine, 526 * then try lower shift 527 */ 528 sctx.shift_hi = info->shift_fraction; 529 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 530 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 531 sctx.lushift = PETSC_TRUE; 532 sctx.nshift++; 533 } 534 } while (sctx.lushift); 535 536 /* invert diagonal entries for simplier triangular solves */ 537 for (i=0; i<n; i++) { 538 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 539 } 540 541 ierr = PetscFree(rtmp);CHKERRQ(ierr); 542 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 543 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 544 C->factor = FACTOR_LU; 545 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 546 C->assembled = PETSC_TRUE; 547 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 548 if (sctx.nshift){ 549 if (info->shiftnz) { 550 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 551 } else if (info->shiftpd) { 552 b->lu_shift_fraction = info->shift_fraction; 553 PetscLogInfo(0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top); 554 } 555 } 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 561 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 562 { 563 PetscFunctionBegin; 564 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 565 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 566 PetscFunctionReturn(0); 567 } 568 569 570 /* ----------------------------------------------------------- */ 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatLUFactor_SeqAIJ" 573 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 574 { 575 PetscErrorCode ierr; 576 Mat C; 577 578 PetscFunctionBegin; 579 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 580 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 581 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 582 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 /* ----------------------------------------------------------- */ 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatSolve_SeqAIJ" 588 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 589 { 590 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 591 IS iscol = a->col,isrow = a->row; 592 PetscErrorCode ierr; 593 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 594 PetscInt nz,*rout,*cout; 595 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 596 597 PetscFunctionBegin; 598 if (!n) PetscFunctionReturn(0); 599 600 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 602 tmp = a->solve_work; 603 604 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 605 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 606 607 /* forward solve the lower triangular */ 608 tmp[0] = b[*r++]; 609 tmps = tmp; 610 for (i=1; i<n; i++) { 611 v = aa + ai[i] ; 612 vi = aj + ai[i] ; 613 nz = a->diag[i] - ai[i]; 614 sum = b[*r++]; 615 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 616 tmp[i] = sum; 617 } 618 619 /* backward solve the upper triangular */ 620 for (i=n-1; i>=0; i--){ 621 v = aa + a->diag[i] + 1; 622 vi = aj + a->diag[i] + 1; 623 nz = ai[i+1] - a->diag[i] - 1; 624 sum = tmp[i]; 625 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 626 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 627 } 628 629 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 630 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 631 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 632 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 633 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } 636 637 /* ----------------------------------------------------------- */ 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 640 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 641 { 642 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 643 PetscErrorCode ierr; 644 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 645 PetscScalar *x,*b,*aa = a->a; 646 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 647 PetscInt adiag_i,i,*vi,nz,ai_i; 648 PetscScalar *v,sum; 649 #endif 650 651 PetscFunctionBegin; 652 if (!n) PetscFunctionReturn(0); 653 654 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 655 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 656 657 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 658 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 659 #else 660 /* forward solve the lower triangular */ 661 x[0] = b[0]; 662 for (i=1; i<n; i++) { 663 ai_i = ai[i]; 664 v = aa + ai_i; 665 vi = aj + ai_i; 666 nz = adiag[i] - ai_i; 667 sum = b[i]; 668 while (nz--) sum -= *v++ * x[*vi++]; 669 x[i] = sum; 670 } 671 672 /* backward solve the upper triangular */ 673 for (i=n-1; i>=0; i--){ 674 adiag_i = adiag[i]; 675 v = aa + adiag_i + 1; 676 vi = aj + adiag_i + 1; 677 nz = ai[i+1] - adiag_i - 1; 678 sum = x[i]; 679 while (nz--) sum -= *v++ * x[*vi++]; 680 x[i] = sum*aa[adiag_i]; 681 } 682 #endif 683 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 684 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 685 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 691 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 692 { 693 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 694 IS iscol = a->col,isrow = a->row; 695 PetscErrorCode ierr; 696 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 697 PetscInt nz,*rout,*cout; 698 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 699 700 PetscFunctionBegin; 701 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 702 703 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 704 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 705 tmp = a->solve_work; 706 707 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 708 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 709 710 /* forward solve the lower triangular */ 711 tmp[0] = b[*r++]; 712 for (i=1; i<n; i++) { 713 v = aa + ai[i] ; 714 vi = aj + ai[i] ; 715 nz = a->diag[i] - ai[i]; 716 sum = b[*r++]; 717 while (nz--) sum -= *v++ * tmp[*vi++ ]; 718 tmp[i] = sum; 719 } 720 721 /* backward solve the upper triangular */ 722 for (i=n-1; i>=0; i--){ 723 v = aa + a->diag[i] + 1; 724 vi = aj + a->diag[i] + 1; 725 nz = ai[i+1] - a->diag[i] - 1; 726 sum = tmp[i]; 727 while (nz--) sum -= *v++ * tmp[*vi++ ]; 728 tmp[i] = sum*aa[a->diag[i]]; 729 x[*c--] += tmp[i]; 730 } 731 732 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 733 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 734 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 735 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 736 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 737 738 PetscFunctionReturn(0); 739 } 740 /* -------------------------------------------------------------------*/ 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 743 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 744 { 745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 746 IS iscol = a->col,isrow = a->row; 747 PetscErrorCode ierr; 748 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 749 PetscInt nz,*rout,*cout,*diag = a->diag; 750 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 751 752 PetscFunctionBegin; 753 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 754 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 755 tmp = a->solve_work; 756 757 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 758 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 759 760 /* copy the b into temp work space according to permutation */ 761 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 762 763 /* forward solve the U^T */ 764 for (i=0; i<n; i++) { 765 v = aa + diag[i] ; 766 vi = aj + diag[i] + 1; 767 nz = ai[i+1] - diag[i] - 1; 768 s1 = tmp[i]; 769 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 770 while (nz--) { 771 tmp[*vi++ ] -= (*v++)*s1; 772 } 773 tmp[i] = s1; 774 } 775 776 /* backward solve the L^T */ 777 for (i=n-1; i>=0; i--){ 778 v = aa + diag[i] - 1 ; 779 vi = aj + diag[i] - 1 ; 780 nz = diag[i] - ai[i]; 781 s1 = tmp[i]; 782 while (nz--) { 783 tmp[*vi-- ] -= (*v--)*s1; 784 } 785 } 786 787 /* copy tmp into x according to permutation */ 788 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 789 790 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 791 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 792 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 795 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 801 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 802 { 803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 804 IS iscol = a->col,isrow = a->row; 805 PetscErrorCode ierr; 806 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 807 PetscInt nz,*rout,*cout,*diag = a->diag; 808 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 809 810 PetscFunctionBegin; 811 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 812 813 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 814 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 815 tmp = a->solve_work; 816 817 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 818 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 819 820 /* copy the b into temp work space according to permutation */ 821 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 822 823 /* forward solve the U^T */ 824 for (i=0; i<n; i++) { 825 v = aa + diag[i] ; 826 vi = aj + diag[i] + 1; 827 nz = ai[i+1] - diag[i] - 1; 828 tmp[i] *= *v++; 829 while (nz--) { 830 tmp[*vi++ ] -= (*v++)*tmp[i]; 831 } 832 } 833 834 /* backward solve the L^T */ 835 for (i=n-1; i>=0; i--){ 836 v = aa + diag[i] - 1 ; 837 vi = aj + diag[i] - 1 ; 838 nz = diag[i] - ai[i]; 839 while (nz--) { 840 tmp[*vi-- ] -= (*v--)*tmp[i]; 841 } 842 } 843 844 /* copy tmp into x according to permutation */ 845 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 846 847 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 848 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 849 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 850 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 851 852 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 /* ----------------------------------------------------------------*/ 856 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 860 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 861 { 862 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 863 IS isicol; 864 PetscErrorCode ierr; 865 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 866 PetscInt *bi,*cols,nnz,*cols_lvl; 867 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 868 PetscInt i,levels,diagonal_fill; 869 PetscTruth col_identity,row_identity; 870 PetscReal f; 871 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 872 PetscBT lnkbt; 873 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 874 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 875 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 876 877 PetscFunctionBegin; 878 f = info->fill; 879 levels = (PetscInt)info->levels; 880 diagonal_fill = (PetscInt)info->diagonal_fill; 881 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 882 883 /* special case that simply copies fill pattern */ 884 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 885 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 886 if (!levels && row_identity && col_identity) { 887 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 888 (*fact)->factor = FACTOR_LU; 889 b = (Mat_SeqAIJ*)(*fact)->data; 890 if (!b->diag) { 891 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 892 } 893 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 894 b->row = isrow; 895 b->col = iscol; 896 b->icol = isicol; 897 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 898 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 899 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 900 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 905 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 906 907 /* get new row pointers */ 908 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 909 bi[0] = 0; 910 /* bdiag is location of diagonal in factor */ 911 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 912 bdiag[0] = 0; 913 914 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 915 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 916 917 /* create a linked list for storing column indices of the active row */ 918 nlnk = n + 1; 919 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 920 921 /* initial FreeSpace size is f*(ai[n]+1) */ 922 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 923 current_space = free_space; 924 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 925 current_space_lvl = free_space_lvl; 926 927 for (i=0; i<n; i++) { 928 nzi = 0; 929 /* copy current row into linked list */ 930 nnz = ai[r[i]+1] - ai[r[i]]; 931 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 932 cols = aj + ai[r[i]]; 933 lnk[i] = -1; /* marker to indicate if diagonal exists */ 934 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 935 nzi += nlnk; 936 937 /* make sure diagonal entry is included */ 938 if (diagonal_fill && lnk[i] == -1) { 939 fm = n; 940 while (lnk[fm] < i) fm = lnk[fm]; 941 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 942 lnk[fm] = i; 943 lnk_lvl[i] = 0; 944 nzi++; dcount++; 945 } 946 947 /* add pivot rows into the active row */ 948 nzbd = 0; 949 prow = lnk[n]; 950 while (prow < i) { 951 nnz = bdiag[prow]; 952 cols = bj_ptr[prow] + nnz + 1; 953 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 954 nnz = bi[prow+1] - bi[prow] - nnz - 1; 955 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 956 nzi += nlnk; 957 prow = lnk[prow]; 958 nzbd++; 959 } 960 bdiag[i] = nzbd; 961 bi[i+1] = bi[i] + nzi; 962 963 /* if free space is not available, make more free space */ 964 if (current_space->local_remaining<nzi) { 965 nnz = nzi*(n - i); /* estimated and max additional space needed */ 966 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 967 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 968 reallocs++; 969 } 970 971 /* copy data into free_space and free_space_lvl, then initialize lnk */ 972 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 973 bj_ptr[i] = current_space->array; 974 bjlvl_ptr[i] = current_space_lvl->array; 975 976 /* make sure the active row i has diagonal entry */ 977 if (*(bj_ptr[i]+bdiag[i]) != i) { 978 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 979 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 980 } 981 982 current_space->array += nzi; 983 current_space->local_used += nzi; 984 current_space->local_remaining -= nzi; 985 current_space_lvl->array += nzi; 986 current_space_lvl->local_used += nzi; 987 current_space_lvl->local_remaining -= nzi; 988 } 989 990 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 991 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 992 993 /* destroy list of free space and other temporary arrays */ 994 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 995 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 996 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 997 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 998 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 999 1000 { 1001 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1002 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af); 1003 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af); 1004 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af); 1005 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 1006 if (diagonal_fill) { 1007 PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount); 1008 } 1009 } 1010 1011 /* put together the new matrix */ 1012 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1013 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1014 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1015 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1016 b = (Mat_SeqAIJ*)(*fact)->data; 1017 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1018 b->singlemalloc = PETSC_FALSE; 1019 len = (bi[n] )*sizeof(PetscScalar); 1020 /* the next line frees the default space generated by the Create() */ 1021 ierr = PetscFree(b->a);CHKERRQ(ierr); 1022 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1023 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1024 b->j = bj; 1025 b->i = bi; 1026 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1027 b->diag = bdiag; 1028 b->ilen = 0; 1029 b->imax = 0; 1030 b->row = isrow; 1031 b->col = iscol; 1032 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1033 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1034 b->icol = isicol; 1035 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1036 /* In b structure: Free imax, ilen, old a, old j. 1037 Allocate bdiag, solve_work, new a, new j */ 1038 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1039 b->maxnz = b->nz = bi[n] ; 1040 (*fact)->factor = FACTOR_LU; 1041 (*fact)->info.factor_mallocs = reallocs; 1042 (*fact)->info.fill_ratio_given = f; 1043 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1044 1045 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1046 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1047 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #include "src/mat/impls/sbaij/seq/sbaij.h" 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1054 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1055 { 1056 Mat C = *B; 1057 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1058 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1059 IS ip=b->row; 1060 PetscErrorCode ierr; 1061 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1062 PetscInt *ai=a->i,*aj=a->j; 1063 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1064 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1065 PetscReal zeropivot,rs,shiftnz; 1066 PetscTruth shiftpd; 1067 ChShift_Ctx sctx; 1068 PetscInt newshift; 1069 1070 PetscFunctionBegin; 1071 shiftnz = info->shiftnz; 1072 shiftpd = info->shiftpd; 1073 zeropivot = info->zeropivot; 1074 1075 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1076 1077 /* initialization */ 1078 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1079 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1080 jl = il + mbs; 1081 rtmp = (MatScalar*)(jl + mbs); 1082 1083 sctx.shift_amount = 0; 1084 sctx.nshift = 0; 1085 do { 1086 sctx.chshift = PETSC_FALSE; 1087 for (i=0; i<mbs; i++) { 1088 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1089 } 1090 1091 for (k = 0; k<mbs; k++){ 1092 bval = ba + bi[k]; 1093 /* initialize k-th row by the perm[k]-th row of A */ 1094 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1095 for (j = jmin; j < jmax; j++){ 1096 col = rip[aj[j]]; 1097 if (col >= k){ /* only take upper triangular entry */ 1098 rtmp[col] = aa[j]; 1099 *bval++ = 0.0; /* for in-place factorization */ 1100 } 1101 } 1102 /* shift the diagonal of the matrix */ 1103 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1104 1105 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1106 dk = rtmp[k]; 1107 i = jl[k]; /* first row to be added to k_th row */ 1108 1109 while (i < k){ 1110 nexti = jl[i]; /* next row to be added to k_th row */ 1111 1112 /* compute multiplier, update diag(k) and U(i,k) */ 1113 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1114 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1115 dk += uikdi*ba[ili]; 1116 ba[ili] = uikdi; /* -U(i,k) */ 1117 1118 /* add multiple of row i to k-th row */ 1119 jmin = ili + 1; jmax = bi[i+1]; 1120 if (jmin < jmax){ 1121 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1122 /* update il and jl for row i */ 1123 il[i] = jmin; 1124 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1125 } 1126 i = nexti; 1127 } 1128 1129 /* shift the diagonals when zero pivot is detected */ 1130 /* compute rs=sum of abs(off-diagonal) */ 1131 rs = 0.0; 1132 jmin = bi[k]+1; 1133 nz = bi[k+1] - jmin; 1134 if (nz){ 1135 bcol = bj + jmin; 1136 while (nz--){ 1137 rs += PetscAbsScalar(rtmp[*bcol]); 1138 bcol++; 1139 } 1140 } 1141 1142 sctx.rs = rs; 1143 sctx.pv = dk; 1144 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1145 if (newshift == 1){ 1146 break; /* sctx.shift_amount is updated */ 1147 } else if (newshift == -1){ 1148 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1149 } 1150 1151 /* copy data into U(k,:) */ 1152 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1153 jmin = bi[k]+1; jmax = bi[k+1]; 1154 if (jmin < jmax) { 1155 for (j=jmin; j<jmax; j++){ 1156 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1157 } 1158 /* add the k-th row into il and jl */ 1159 il[k] = jmin; 1160 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1161 } 1162 } 1163 } while (sctx.chshift); 1164 ierr = PetscFree(il);CHKERRQ(ierr); 1165 1166 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1167 C->factor = FACTOR_CHOLESKY; 1168 C->assembled = PETSC_TRUE; 1169 C->preallocated = PETSC_TRUE; 1170 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1171 if (sctx.nshift){ 1172 if (shiftnz) { 1173 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1174 } else if (shiftpd) { 1175 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount); 1176 } 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1183 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1184 { 1185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1186 Mat_SeqSBAIJ *b; 1187 Mat B; 1188 PetscErrorCode ierr; 1189 PetscTruth perm_identity; 1190 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1191 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1192 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1193 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1194 PetscReal fill=info->fill,levels=info->levels; 1195 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1196 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1197 PetscBT lnkbt; 1198 1199 PetscFunctionBegin; 1200 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1201 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1202 1203 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1204 ui[0] = 0; 1205 1206 /* special case that simply copies fill pattern */ 1207 if (!levels && perm_identity) { 1208 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1209 for (i=0; i<am; i++) { 1210 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1211 } 1212 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1213 cols = uj; 1214 for (i=0; i<am; i++) { 1215 aj = a->j + a->diag[i]; 1216 ncols = ui[i+1] - ui[i]; 1217 for (j=0; j<ncols; j++) *cols++ = *aj++; 1218 } 1219 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1220 /* initialization */ 1221 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1222 1223 /* jl: linked list for storing indices of the pivot rows 1224 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1225 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1226 il = jl + am; 1227 uj_ptr = (PetscInt**)(il + am); 1228 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1229 for (i=0; i<am; i++){ 1230 jl[i] = am; il[i] = 0; 1231 } 1232 1233 /* create and initialize a linked list for storing column indices of the active row k */ 1234 nlnk = am + 1; 1235 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1236 1237 /* initial FreeSpace size is fill*(ai[am]+1) */ 1238 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1239 current_space = free_space; 1240 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1241 current_space_lvl = free_space_lvl; 1242 1243 for (k=0; k<am; k++){ /* for each active row k */ 1244 /* initialize lnk by the column indices of row rip[k] of A */ 1245 nzk = 0; 1246 ncols = ai[rip[k]+1] - ai[rip[k]]; 1247 ncols_upper = 0; 1248 for (j=0; j<ncols; j++){ 1249 i = *(aj + ai[rip[k]] + j); 1250 if (rip[i] >= k){ /* only take upper triangular entry */ 1251 ajtmp[ncols_upper] = i; 1252 ncols_upper++; 1253 } 1254 } 1255 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1256 nzk += nlnk; 1257 1258 /* update lnk by computing fill-in for each pivot row to be merged in */ 1259 prow = jl[k]; /* 1st pivot row */ 1260 1261 while (prow < k){ 1262 nextprow = jl[prow]; 1263 1264 /* merge prow into k-th row */ 1265 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1266 jmax = ui[prow+1]; 1267 ncols = jmax-jmin; 1268 i = jmin - ui[prow]; 1269 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1270 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1271 j = *(uj - 1); 1272 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1273 nzk += nlnk; 1274 1275 /* update il and jl for prow */ 1276 if (jmin < jmax){ 1277 il[prow] = jmin; 1278 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1279 } 1280 prow = nextprow; 1281 } 1282 1283 /* if free space is not available, make more free space */ 1284 if (current_space->local_remaining<nzk) { 1285 i = am - k + 1; /* num of unfactored rows */ 1286 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1287 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1288 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1289 reallocs++; 1290 } 1291 1292 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1293 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1294 1295 /* add the k-th row into il and jl */ 1296 if (nzk > 1){ 1297 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1298 jl[k] = jl[i]; jl[i] = k; 1299 il[k] = ui[k] + 1; 1300 } 1301 uj_ptr[k] = current_space->array; 1302 uj_lvl_ptr[k] = current_space_lvl->array; 1303 1304 current_space->array += nzk; 1305 current_space->local_used += nzk; 1306 current_space->local_remaining -= nzk; 1307 1308 current_space_lvl->array += nzk; 1309 current_space_lvl->local_used += nzk; 1310 current_space_lvl->local_remaining -= nzk; 1311 1312 ui[k+1] = ui[k] + nzk; 1313 } 1314 1315 if (ai[am] != 0) { 1316 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1317 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1318 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1319 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1320 } else { 1321 PetscLogInfo(A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1322 } 1323 1324 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1325 ierr = PetscFree(jl);CHKERRQ(ierr); 1326 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1327 1328 /* destroy list of free space and other temporary array(s) */ 1329 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1330 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1331 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1332 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1333 1334 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1335 1336 /* put together the new matrix in MATSEQSBAIJ format */ 1337 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1338 B = *fact; 1339 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1340 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1341 1342 b = (Mat_SeqSBAIJ*)B->data; 1343 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1344 b->singlemalloc = PETSC_FALSE; 1345 /* the next line frees the default space generated by the Create() */ 1346 ierr = PetscFree(b->a);CHKERRQ(ierr); 1347 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1348 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1349 b->j = uj; 1350 b->i = ui; 1351 b->diag = 0; 1352 b->ilen = 0; 1353 b->imax = 0; 1354 b->row = perm; 1355 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1356 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1357 b->icol = perm; 1358 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1359 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1360 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1361 b->maxnz = b->nz = ui[am]; 1362 1363 B->factor = FACTOR_CHOLESKY; 1364 B->info.factor_mallocs = reallocs; 1365 B->info.fill_ratio_given = fill; 1366 if (ai[am] != 0) { 1367 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1368 } else { 1369 B->info.fill_ratio_needed = 0.0; 1370 } 1371 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1372 if (perm_identity){ 1373 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1374 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1381 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1382 { 1383 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1384 Mat_SeqSBAIJ *b; 1385 Mat B; 1386 PetscErrorCode ierr; 1387 PetscTruth perm_identity; 1388 PetscReal fill = info->fill; 1389 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1390 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1391 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1392 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1393 PetscBT lnkbt; 1394 IS iperm; 1395 1396 PetscFunctionBegin; 1397 /* check whether perm is the identity mapping */ 1398 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1399 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1400 1401 if (!perm_identity){ 1402 /* check if perm is symmetric! */ 1403 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1404 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1405 for (i=0; i<am; i++) { 1406 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1407 } 1408 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1409 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1410 } 1411 1412 /* initialization */ 1413 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1414 ui[0] = 0; 1415 1416 /* jl: linked list for storing indices of the pivot rows 1417 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1418 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1419 il = jl + am; 1420 cols = il + am; 1421 ui_ptr = (PetscInt**)(cols + am); 1422 for (i=0; i<am; i++){ 1423 jl[i] = am; il[i] = 0; 1424 } 1425 1426 /* create and initialize a linked list for storing column indices of the active row k */ 1427 nlnk = am + 1; 1428 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1429 1430 /* initial FreeSpace size is fill*(ai[am]+1) */ 1431 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1432 current_space = free_space; 1433 1434 for (k=0; k<am; k++){ /* for each active row k */ 1435 /* initialize lnk by the column indices of row rip[k] of A */ 1436 nzk = 0; 1437 ncols = ai[rip[k]+1] - ai[rip[k]]; 1438 ncols_upper = 0; 1439 for (j=0; j<ncols; j++){ 1440 i = rip[*(aj + ai[rip[k]] + j)]; 1441 if (i >= k){ /* only take upper triangular entry */ 1442 cols[ncols_upper] = i; 1443 ncols_upper++; 1444 } 1445 } 1446 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1447 nzk += nlnk; 1448 1449 /* update lnk by computing fill-in for each pivot row to be merged in */ 1450 prow = jl[k]; /* 1st pivot row */ 1451 1452 while (prow < k){ 1453 nextprow = jl[prow]; 1454 /* merge prow into k-th row */ 1455 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1456 jmax = ui[prow+1]; 1457 ncols = jmax-jmin; 1458 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1459 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1460 nzk += nlnk; 1461 1462 /* update il and jl for prow */ 1463 if (jmin < jmax){ 1464 il[prow] = jmin; 1465 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1466 } 1467 prow = nextprow; 1468 } 1469 1470 /* if free space is not available, make more free space */ 1471 if (current_space->local_remaining<nzk) { 1472 i = am - k + 1; /* num of unfactored rows */ 1473 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1474 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1475 reallocs++; 1476 } 1477 1478 /* copy data into free space, then initialize lnk */ 1479 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1480 1481 /* add the k-th row into il and jl */ 1482 if (nzk-1 > 0){ 1483 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1484 jl[k] = jl[i]; jl[i] = k; 1485 il[k] = ui[k] + 1; 1486 } 1487 ui_ptr[k] = current_space->array; 1488 current_space->array += nzk; 1489 current_space->local_used += nzk; 1490 current_space->local_remaining -= nzk; 1491 1492 ui[k+1] = ui[k] + nzk; 1493 } 1494 1495 if (ai[am] != 0) { 1496 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1497 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af); 1498 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af); 1499 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af); 1500 } else { 1501 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"); 1502 } 1503 1504 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1505 ierr = PetscFree(jl);CHKERRQ(ierr); 1506 1507 /* destroy list of free space and other temporary array(s) */ 1508 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1509 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1510 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1511 1512 /* put together the new matrix in MATSEQSBAIJ format */ 1513 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1514 B = *fact; 1515 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1516 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1517 1518 b = (Mat_SeqSBAIJ*)B->data; 1519 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1520 b->singlemalloc = PETSC_FALSE; 1521 /* the next line frees the default space generated by the Create() */ 1522 ierr = PetscFree(b->a);CHKERRQ(ierr); 1523 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1524 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1525 b->j = uj; 1526 b->i = ui; 1527 b->diag = 0; 1528 b->ilen = 0; 1529 b->imax = 0; 1530 b->row = perm; 1531 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1532 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1533 b->icol = perm; 1534 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1535 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1536 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1537 b->maxnz = b->nz = ui[am]; 1538 1539 B->factor = FACTOR_CHOLESKY; 1540 B->info.factor_mallocs = reallocs; 1541 B->info.fill_ratio_given = fill; 1542 if (ai[am] != 0) { 1543 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1544 } else { 1545 B->info.fill_ratio_needed = 0.0; 1546 } 1547 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1548 if (perm_identity){ 1549 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1550 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554