1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 #if defined(PETSC_USE_DEBUG) 355 if (ai[n] != 0) { 356 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 361 } else { 362 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 363 } 364 #endif 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 ierr = PetscFree(b->imax);CHKERRQ(ierr); 382 b->singlemalloc = PETSC_FALSE; 383 /* the next line frees the default space generated by the Create() */ 384 ierr = PetscFree(b->a);CHKERRQ(ierr); 385 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 386 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = 0; 391 b->imax = 0; 392 b->row = isrow; 393 b->col = iscol; 394 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 395 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 396 b->icol = isicol; 397 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 401 b->maxnz = b->nz = bi[n] ; 402 403 (*B)->factor = FACTOR_LU; 404 (*B)->info.factor_mallocs = reallocs; 405 (*B)->info.fill_ratio_given = f; 406 407 if (ai[n] != 0) { 408 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 409 } else { 410 (*B)->info.fill_ratio_needed = 0.0; 411 } 412 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 413 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 414 PetscFunctionReturn(0); 415 } 416 417 /* ----------------------------------------------------------- */ 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 420 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 421 { 422 Mat C=*B; 423 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 424 IS isrow = b->row,isicol = b->icol; 425 PetscErrorCode ierr; 426 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 427 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 428 PetscInt *diag_offset = b->diag,diag,*pj; 429 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 430 PetscScalar d; 431 PetscReal rs; 432 LUShift_Ctx sctx; 433 PetscInt newshift; 434 435 PetscFunctionBegin; 436 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 437 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 438 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 439 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 440 rtmps = rtmp; ics = ic; 441 442 if (!a->diag) { 443 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 444 } 445 /* if both shift schemes are chosen by user, only use info->shiftpd */ 446 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 447 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 448 PetscInt *aai = a->i,*ddiag = a->diag; 449 sctx.shift_top = 0; 450 for (i=0; i<n; i++) { 451 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 452 d = (a->a)[ddiag[i]]; 453 rs = -PetscAbsScalar(d) - PetscRealPart(d); 454 v = a->a+aai[i]; 455 nz = aai[i+1] - aai[i]; 456 for (j=0; j<nz; j++) 457 rs += PetscAbsScalar(v[j]); 458 if (rs>sctx.shift_top) sctx.shift_top = rs; 459 } 460 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 461 sctx.shift_top *= 1.1; 462 sctx.nshift_max = 5; 463 sctx.shift_lo = 0.; 464 sctx.shift_hi = 1.; 465 } 466 467 sctx.shift_amount = 0; 468 sctx.nshift = 0; 469 do { 470 sctx.lushift = PETSC_FALSE; 471 for (i=0; i<n; i++){ 472 nz = bi[i+1] - bi[i]; 473 bjtmp = bj + bi[i]; 474 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 475 476 /* load in initial (unfactored row) */ 477 nz = a->i[r[i]+1] - a->i[r[i]]; 478 ajtmp = a->j + a->i[r[i]]; 479 v = a->a + a->i[r[i]]; 480 for (j=0; j<nz; j++) { 481 rtmp[ics[ajtmp[j]]] = v[j]; 482 } 483 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 484 485 row = *bjtmp++; 486 while (row < i) { 487 pc = rtmp + row; 488 if (*pc != 0.0) { 489 pv = b->a + diag_offset[row]; 490 pj = b->j + diag_offset[row] + 1; 491 multiplier = *pc / *pv++; 492 *pc = multiplier; 493 nz = bi[row+1] - diag_offset[row] - 1; 494 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 495 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 496 } 497 row = *bjtmp++; 498 } 499 /* finished row so stick it into b->a */ 500 pv = b->a + bi[i] ; 501 pj = b->j + bi[i] ; 502 nz = bi[i+1] - bi[i]; 503 diag = diag_offset[i] - bi[i]; 504 rs = 0.0; 505 for (j=0; j<nz; j++) { 506 pv[j] = rtmps[pj[j]]; 507 if (j != diag) rs += PetscAbsScalar(pv[j]); 508 } 509 510 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 511 sctx.rs = rs; 512 sctx.pv = pv[diag]; 513 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 514 if (newshift == 1){ 515 break; /* sctx.shift_amount is updated */ 516 } else if (newshift == -1){ 517 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 518 } 519 } 520 521 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 522 /* 523 * if no shift in this attempt & shifting & started shifting & can refine, 524 * then try lower shift 525 */ 526 sctx.shift_hi = info->shift_fraction; 527 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 528 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 529 sctx.lushift = PETSC_TRUE; 530 sctx.nshift++; 531 } 532 } while (sctx.lushift); 533 534 /* invert diagonal entries for simplier triangular solves */ 535 for (i=0; i<n; i++) { 536 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 537 } 538 539 ierr = PetscFree(rtmp);CHKERRQ(ierr); 540 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 541 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 542 C->factor = FACTOR_LU; 543 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 544 C->assembled = PETSC_TRUE; 545 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 546 if (sctx.nshift){ 547 if (info->shiftnz) { 548 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 549 } else if (info->shiftpd) { 550 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 551 } 552 } 553 PetscFunctionReturn(0); 554 } 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 558 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 559 { 560 PetscFunctionBegin; 561 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 562 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 563 PetscFunctionReturn(0); 564 } 565 566 567 /* ----------------------------------------------------------- */ 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatLUFactor_SeqAIJ" 570 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 571 { 572 PetscErrorCode ierr; 573 Mat C; 574 575 PetscFunctionBegin; 576 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 577 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 578 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 579 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 /* ----------------------------------------------------------- */ 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatSolve_SeqAIJ" 585 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 586 { 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 588 IS iscol = a->col,isrow = a->row; 589 PetscErrorCode ierr; 590 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 591 PetscInt nz,*rout,*cout; 592 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 593 594 PetscFunctionBegin; 595 if (!n) PetscFunctionReturn(0); 596 597 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 599 tmp = a->solve_work; 600 601 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 602 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 603 604 /* forward solve the lower triangular */ 605 tmp[0] = b[*r++]; 606 tmps = tmp; 607 for (i=1; i<n; i++) { 608 v = aa + ai[i] ; 609 vi = aj + ai[i] ; 610 nz = a->diag[i] - ai[i]; 611 sum = b[*r++]; 612 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 613 tmp[i] = sum; 614 } 615 616 /* backward solve the upper triangular */ 617 for (i=n-1; i>=0; i--){ 618 v = aa + a->diag[i] + 1; 619 vi = aj + a->diag[i] + 1; 620 nz = ai[i+1] - a->diag[i] - 1; 621 sum = tmp[i]; 622 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 623 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 624 } 625 626 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 627 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 628 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 629 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 630 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 /* ----------------------------------------------------------- */ 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 637 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 638 { 639 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 640 PetscErrorCode ierr; 641 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 642 PetscScalar *x,*b,*aa = a->a; 643 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 644 PetscInt adiag_i,i,*vi,nz,ai_i; 645 PetscScalar *v,sum; 646 #endif 647 648 PetscFunctionBegin; 649 if (!n) PetscFunctionReturn(0); 650 651 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 652 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 653 654 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 655 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 656 #else 657 /* forward solve the lower triangular */ 658 x[0] = b[0]; 659 for (i=1; i<n; i++) { 660 ai_i = ai[i]; 661 v = aa + ai_i; 662 vi = aj + ai_i; 663 nz = adiag[i] - ai_i; 664 sum = b[i]; 665 while (nz--) sum -= *v++ * x[*vi++]; 666 x[i] = sum; 667 } 668 669 /* backward solve the upper triangular */ 670 for (i=n-1; i>=0; i--){ 671 adiag_i = adiag[i]; 672 v = aa + adiag_i + 1; 673 vi = aj + adiag_i + 1; 674 nz = ai[i+1] - adiag_i - 1; 675 sum = x[i]; 676 while (nz--) sum -= *v++ * x[*vi++]; 677 x[i] = sum*aa[adiag_i]; 678 } 679 #endif 680 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 681 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 682 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 688 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 689 { 690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 691 IS iscol = a->col,isrow = a->row; 692 PetscErrorCode ierr; 693 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 694 PetscInt nz,*rout,*cout; 695 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 696 697 PetscFunctionBegin; 698 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 699 700 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 tmp = a->solve_work; 703 704 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 705 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 706 707 /* forward solve the lower triangular */ 708 tmp[0] = b[*r++]; 709 for (i=1; i<n; i++) { 710 v = aa + ai[i] ; 711 vi = aj + ai[i] ; 712 nz = a->diag[i] - ai[i]; 713 sum = b[*r++]; 714 while (nz--) sum -= *v++ * tmp[*vi++ ]; 715 tmp[i] = sum; 716 } 717 718 /* backward solve the upper triangular */ 719 for (i=n-1; i>=0; i--){ 720 v = aa + a->diag[i] + 1; 721 vi = aj + a->diag[i] + 1; 722 nz = ai[i+1] - a->diag[i] - 1; 723 sum = tmp[i]; 724 while (nz--) sum -= *v++ * tmp[*vi++ ]; 725 tmp[i] = sum*aa[a->diag[i]]; 726 x[*c--] += tmp[i]; 727 } 728 729 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 730 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 731 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 732 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 733 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 734 735 PetscFunctionReturn(0); 736 } 737 /* -------------------------------------------------------------------*/ 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 740 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 741 { 742 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 743 IS iscol = a->col,isrow = a->row; 744 PetscErrorCode ierr; 745 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 746 PetscInt nz,*rout,*cout,*diag = a->diag; 747 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 748 749 PetscFunctionBegin; 750 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 751 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 752 tmp = a->solve_work; 753 754 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 755 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 756 757 /* copy the b into temp work space according to permutation */ 758 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 759 760 /* forward solve the U^T */ 761 for (i=0; i<n; i++) { 762 v = aa + diag[i] ; 763 vi = aj + diag[i] + 1; 764 nz = ai[i+1] - diag[i] - 1; 765 s1 = tmp[i]; 766 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 767 while (nz--) { 768 tmp[*vi++ ] -= (*v++)*s1; 769 } 770 tmp[i] = s1; 771 } 772 773 /* backward solve the L^T */ 774 for (i=n-1; i>=0; i--){ 775 v = aa + diag[i] - 1 ; 776 vi = aj + diag[i] - 1 ; 777 nz = diag[i] - ai[i]; 778 s1 = tmp[i]; 779 while (nz--) { 780 tmp[*vi-- ] -= (*v--)*s1; 781 } 782 } 783 784 /* copy tmp into x according to permutation */ 785 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 786 787 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 788 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 789 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 790 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 791 792 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 798 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 799 { 800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 801 IS iscol = a->col,isrow = a->row; 802 PetscErrorCode ierr; 803 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 804 PetscInt nz,*rout,*cout,*diag = a->diag; 805 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 806 807 PetscFunctionBegin; 808 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 809 810 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 811 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 812 tmp = a->solve_work; 813 814 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 815 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 816 817 /* copy the b into temp work space according to permutation */ 818 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 819 820 /* forward solve the U^T */ 821 for (i=0; i<n; i++) { 822 v = aa + diag[i] ; 823 vi = aj + diag[i] + 1; 824 nz = ai[i+1] - diag[i] - 1; 825 tmp[i] *= *v++; 826 while (nz--) { 827 tmp[*vi++ ] -= (*v++)*tmp[i]; 828 } 829 } 830 831 /* backward solve the L^T */ 832 for (i=n-1; i>=0; i--){ 833 v = aa + diag[i] - 1 ; 834 vi = aj + diag[i] - 1 ; 835 nz = diag[i] - ai[i]; 836 while (nz--) { 837 tmp[*vi-- ] -= (*v--)*tmp[i]; 838 } 839 } 840 841 /* copy tmp into x according to permutation */ 842 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 843 844 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 845 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 846 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 847 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 848 849 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 /* ----------------------------------------------------------------*/ 853 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 857 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 858 { 859 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 860 IS isicol; 861 PetscErrorCode ierr; 862 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 863 PetscInt *bi,*cols,nnz,*cols_lvl; 864 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 865 PetscInt i,levels,diagonal_fill; 866 PetscTruth col_identity,row_identity; 867 PetscReal f; 868 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 869 PetscBT lnkbt; 870 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 871 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 872 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 873 874 PetscFunctionBegin; 875 f = info->fill; 876 levels = (PetscInt)info->levels; 877 diagonal_fill = (PetscInt)info->diagonal_fill; 878 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 879 880 /* special case that simply copies fill pattern */ 881 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 882 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 883 if (!levels && row_identity && col_identity) { 884 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 885 (*fact)->factor = FACTOR_LU; 886 b = (Mat_SeqAIJ*)(*fact)->data; 887 if (!b->diag) { 888 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 889 } 890 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 891 b->row = isrow; 892 b->col = iscol; 893 b->icol = isicol; 894 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 895 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 896 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 897 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 902 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 903 904 /* get new row pointers */ 905 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 906 bi[0] = 0; 907 /* bdiag is location of diagonal in factor */ 908 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 909 bdiag[0] = 0; 910 911 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 912 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 913 914 /* create a linked list for storing column indices of the active row */ 915 nlnk = n + 1; 916 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 917 918 /* initial FreeSpace size is f*(ai[n]+1) */ 919 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 920 current_space = free_space; 921 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 922 current_space_lvl = free_space_lvl; 923 924 for (i=0; i<n; i++) { 925 nzi = 0; 926 /* copy current row into linked list */ 927 nnz = ai[r[i]+1] - ai[r[i]]; 928 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 929 cols = aj + ai[r[i]]; 930 lnk[i] = -1; /* marker to indicate if diagonal exists */ 931 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 932 nzi += nlnk; 933 934 /* make sure diagonal entry is included */ 935 if (diagonal_fill && lnk[i] == -1) { 936 fm = n; 937 while (lnk[fm] < i) fm = lnk[fm]; 938 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 939 lnk[fm] = i; 940 lnk_lvl[i] = 0; 941 nzi++; dcount++; 942 } 943 944 /* add pivot rows into the active row */ 945 nzbd = 0; 946 prow = lnk[n]; 947 while (prow < i) { 948 nnz = bdiag[prow]; 949 cols = bj_ptr[prow] + nnz + 1; 950 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 951 nnz = bi[prow+1] - bi[prow] - nnz - 1; 952 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 953 nzi += nlnk; 954 prow = lnk[prow]; 955 nzbd++; 956 } 957 bdiag[i] = nzbd; 958 bi[i+1] = bi[i] + nzi; 959 960 /* if free space is not available, make more free space */ 961 if (current_space->local_remaining<nzi) { 962 nnz = nzi*(n - i); /* estimated and max additional space needed */ 963 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 964 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 965 reallocs++; 966 } 967 968 /* copy data into free_space and free_space_lvl, then initialize lnk */ 969 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 970 bj_ptr[i] = current_space->array; 971 bjlvl_ptr[i] = current_space_lvl->array; 972 973 /* make sure the active row i has diagonal entry */ 974 if (*(bj_ptr[i]+bdiag[i]) != i) { 975 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 976 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 977 } 978 979 current_space->array += nzi; 980 current_space->local_used += nzi; 981 current_space->local_remaining -= nzi; 982 current_space_lvl->array += nzi; 983 current_space_lvl->local_used += nzi; 984 current_space_lvl->local_remaining -= nzi; 985 } 986 987 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 988 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 989 990 /* destroy list of free space and other temporary arrays */ 991 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 992 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 993 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 994 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 995 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 996 997 #if defined(PETSC_USE_DEBUG) 998 { 999 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1003 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1004 if (diagonal_fill) { 1005 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1006 } 1007 } 1008 #endif 1009 1010 /* put together the new matrix */ 1011 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1012 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1013 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1014 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1015 b = (Mat_SeqAIJ*)(*fact)->data; 1016 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1017 b->singlemalloc = PETSC_FALSE; 1018 len = (bi[n] )*sizeof(PetscScalar); 1019 /* the next line frees the default space generated by the Create() */ 1020 ierr = PetscFree(b->a);CHKERRQ(ierr); 1021 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1022 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1023 b->j = bj; 1024 b->i = bi; 1025 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1026 b->diag = bdiag; 1027 b->ilen = 0; 1028 b->imax = 0; 1029 b->row = isrow; 1030 b->col = iscol; 1031 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1032 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1033 b->icol = isicol; 1034 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1035 /* In b structure: Free imax, ilen, old a, old j. 1036 Allocate bdiag, solve_work, new a, new j */ 1037 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1038 b->maxnz = b->nz = bi[n] ; 1039 (*fact)->factor = FACTOR_LU; 1040 (*fact)->info.factor_mallocs = reallocs; 1041 (*fact)->info.fill_ratio_given = f; 1042 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1043 1044 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1045 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1046 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #include "src/mat/impls/sbaij/seq/sbaij.h" 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1053 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1054 { 1055 Mat C = *B; 1056 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1057 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1058 IS ip=b->row; 1059 PetscErrorCode ierr; 1060 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1061 PetscInt *ai=a->i,*aj=a->j; 1062 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1063 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1064 PetscReal zeropivot,rs,shiftnz; 1065 PetscTruth shiftpd; 1066 ChShift_Ctx sctx; 1067 PetscInt newshift; 1068 1069 PetscFunctionBegin; 1070 shiftnz = info->shiftnz; 1071 shiftpd = info->shiftpd; 1072 zeropivot = info->zeropivot; 1073 1074 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1075 1076 /* initialization */ 1077 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1078 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1079 jl = il + mbs; 1080 rtmp = (MatScalar*)(jl + mbs); 1081 1082 sctx.shift_amount = 0; 1083 sctx.nshift = 0; 1084 do { 1085 sctx.chshift = PETSC_FALSE; 1086 for (i=0; i<mbs; i++) { 1087 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1088 } 1089 1090 for (k = 0; k<mbs; k++){ 1091 bval = ba + bi[k]; 1092 /* initialize k-th row by the perm[k]-th row of A */ 1093 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1094 for (j = jmin; j < jmax; j++){ 1095 col = rip[aj[j]]; 1096 if (col >= k){ /* only take upper triangular entry */ 1097 rtmp[col] = aa[j]; 1098 *bval++ = 0.0; /* for in-place factorization */ 1099 } 1100 } 1101 /* shift the diagonal of the matrix */ 1102 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1103 1104 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1105 dk = rtmp[k]; 1106 i = jl[k]; /* first row to be added to k_th row */ 1107 1108 while (i < k){ 1109 nexti = jl[i]; /* next row to be added to k_th row */ 1110 1111 /* compute multiplier, update diag(k) and U(i,k) */ 1112 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1113 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1114 dk += uikdi*ba[ili]; 1115 ba[ili] = uikdi; /* -U(i,k) */ 1116 1117 /* add multiple of row i to k-th row */ 1118 jmin = ili + 1; jmax = bi[i+1]; 1119 if (jmin < jmax){ 1120 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1121 /* update il and jl for row i */ 1122 il[i] = jmin; 1123 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1124 } 1125 i = nexti; 1126 } 1127 1128 /* shift the diagonals when zero pivot is detected */ 1129 /* compute rs=sum of abs(off-diagonal) */ 1130 rs = 0.0; 1131 jmin = bi[k]+1; 1132 nz = bi[k+1] - jmin; 1133 if (nz){ 1134 bcol = bj + jmin; 1135 while (nz--){ 1136 rs += PetscAbsScalar(rtmp[*bcol]); 1137 bcol++; 1138 } 1139 } 1140 1141 sctx.rs = rs; 1142 sctx.pv = dk; 1143 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1144 if (newshift == 1){ 1145 break; /* sctx.shift_amount is updated */ 1146 } else if (newshift == -1){ 1147 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1148 } 1149 1150 /* copy data into U(k,:) */ 1151 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1152 jmin = bi[k]+1; jmax = bi[k+1]; 1153 if (jmin < jmax) { 1154 for (j=jmin; j<jmax; j++){ 1155 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1156 } 1157 /* add the k-th row into il and jl */ 1158 il[k] = jmin; 1159 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1160 } 1161 } 1162 } while (sctx.chshift); 1163 ierr = PetscFree(il);CHKERRQ(ierr); 1164 1165 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1166 C->factor = FACTOR_CHOLESKY; 1167 C->assembled = PETSC_TRUE; 1168 C->preallocated = PETSC_TRUE; 1169 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1170 if (sctx.nshift){ 1171 if (shiftnz) { 1172 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1173 } else if (shiftpd) { 1174 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1175 } 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1182 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1183 { 1184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1185 Mat_SeqSBAIJ *b; 1186 Mat B; 1187 PetscErrorCode ierr; 1188 PetscTruth perm_identity; 1189 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1190 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1191 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1192 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1193 PetscReal fill=info->fill,levels=info->levels; 1194 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1195 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1196 PetscBT lnkbt; 1197 1198 PetscFunctionBegin; 1199 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1200 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1201 1202 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1203 ui[0] = 0; 1204 1205 /* special case that simply copies fill pattern */ 1206 if (!levels && perm_identity) { 1207 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1208 for (i=0; i<am; i++) { 1209 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1210 } 1211 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1212 cols = uj; 1213 for (i=0; i<am; i++) { 1214 aj = a->j + a->diag[i]; 1215 ncols = ui[i+1] - ui[i]; 1216 for (j=0; j<ncols; j++) *cols++ = *aj++; 1217 } 1218 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1219 /* initialization */ 1220 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1221 1222 /* jl: linked list for storing indices of the pivot rows 1223 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1224 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1225 il = jl + am; 1226 uj_ptr = (PetscInt**)(il + am); 1227 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1228 for (i=0; i<am; i++){ 1229 jl[i] = am; il[i] = 0; 1230 } 1231 1232 /* create and initialize a linked list for storing column indices of the active row k */ 1233 nlnk = am + 1; 1234 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1235 1236 /* initial FreeSpace size is fill*(ai[am]+1) */ 1237 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1238 current_space = free_space; 1239 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1240 current_space_lvl = free_space_lvl; 1241 1242 for (k=0; k<am; k++){ /* for each active row k */ 1243 /* initialize lnk by the column indices of row rip[k] of A */ 1244 nzk = 0; 1245 ncols = ai[rip[k]+1] - ai[rip[k]]; 1246 ncols_upper = 0; 1247 for (j=0; j<ncols; j++){ 1248 i = *(aj + ai[rip[k]] + j); 1249 if (rip[i] >= k){ /* only take upper triangular entry */ 1250 ajtmp[ncols_upper] = i; 1251 ncols_upper++; 1252 } 1253 } 1254 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1255 nzk += nlnk; 1256 1257 /* update lnk by computing fill-in for each pivot row to be merged in */ 1258 prow = jl[k]; /* 1st pivot row */ 1259 1260 while (prow < k){ 1261 nextprow = jl[prow]; 1262 1263 /* merge prow into k-th row */ 1264 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1265 jmax = ui[prow+1]; 1266 ncols = jmax-jmin; 1267 i = jmin - ui[prow]; 1268 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1269 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1270 j = *(uj - 1); 1271 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1272 nzk += nlnk; 1273 1274 /* update il and jl for prow */ 1275 if (jmin < jmax){ 1276 il[prow] = jmin; 1277 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1278 } 1279 prow = nextprow; 1280 } 1281 1282 /* if free space is not available, make more free space */ 1283 if (current_space->local_remaining<nzk) { 1284 i = am - k + 1; /* num of unfactored rows */ 1285 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1286 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1287 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1288 reallocs++; 1289 } 1290 1291 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1292 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1293 1294 /* add the k-th row into il and jl */ 1295 if (nzk > 1){ 1296 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1297 jl[k] = jl[i]; jl[i] = k; 1298 il[k] = ui[k] + 1; 1299 } 1300 uj_ptr[k] = current_space->array; 1301 uj_lvl_ptr[k] = current_space_lvl->array; 1302 1303 current_space->array += nzk; 1304 current_space->local_used += nzk; 1305 current_space->local_remaining -= nzk; 1306 1307 current_space_lvl->array += nzk; 1308 current_space_lvl->local_used += nzk; 1309 current_space_lvl->local_remaining -= nzk; 1310 1311 ui[k+1] = ui[k] + nzk; 1312 } 1313 1314 #if defined(PETSC_USE_DEBUG) 1315 if (ai[am] != 0) { 1316 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1317 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1318 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1319 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1322 } 1323 #endif 1324 1325 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1326 ierr = PetscFree(jl);CHKERRQ(ierr); 1327 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1328 1329 /* destroy list of free space and other temporary array(s) */ 1330 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1331 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1332 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1333 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1334 1335 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1336 1337 /* put together the new matrix in MATSEQSBAIJ format */ 1338 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1339 B = *fact; 1340 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1341 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1342 1343 b = (Mat_SeqSBAIJ*)B->data; 1344 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1345 b->singlemalloc = PETSC_FALSE; 1346 /* the next line frees the default space generated by the Create() */ 1347 ierr = PetscFree(b->a);CHKERRQ(ierr); 1348 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1349 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1350 b->j = uj; 1351 b->i = ui; 1352 b->diag = 0; 1353 b->ilen = 0; 1354 b->imax = 0; 1355 b->row = perm; 1356 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1357 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1358 b->icol = perm; 1359 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1360 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1361 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1362 b->maxnz = b->nz = ui[am]; 1363 1364 B->factor = FACTOR_CHOLESKY; 1365 B->info.factor_mallocs = reallocs; 1366 B->info.fill_ratio_given = fill; 1367 if (ai[am] != 0) { 1368 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1369 } else { 1370 B->info.fill_ratio_needed = 0.0; 1371 } 1372 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1373 if (perm_identity){ 1374 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1375 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1382 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1383 { 1384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1385 Mat_SeqSBAIJ *b; 1386 Mat B; 1387 PetscErrorCode ierr; 1388 PetscTruth perm_identity; 1389 PetscReal fill = info->fill; 1390 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1391 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1392 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1393 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1394 PetscBT lnkbt; 1395 IS iperm; 1396 1397 PetscFunctionBegin; 1398 /* check whether perm is the identity mapping */ 1399 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1400 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1401 1402 if (!perm_identity){ 1403 /* check if perm is symmetric! */ 1404 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1405 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1406 for (i=0; i<am; i++) { 1407 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1408 } 1409 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1410 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1411 } 1412 1413 /* initialization */ 1414 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1415 ui[0] = 0; 1416 1417 /* jl: linked list for storing indices of the pivot rows 1418 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1419 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1420 il = jl + am; 1421 cols = il + am; 1422 ui_ptr = (PetscInt**)(cols + am); 1423 for (i=0; i<am; i++){ 1424 jl[i] = am; il[i] = 0; 1425 } 1426 1427 /* create and initialize a linked list for storing column indices of the active row k */ 1428 nlnk = am + 1; 1429 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1430 1431 /* initial FreeSpace size is fill*(ai[am]+1) */ 1432 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1433 current_space = free_space; 1434 1435 for (k=0; k<am; k++){ /* for each active row k */ 1436 /* initialize lnk by the column indices of row rip[k] of A */ 1437 nzk = 0; 1438 ncols = ai[rip[k]+1] - ai[rip[k]]; 1439 ncols_upper = 0; 1440 for (j=0; j<ncols; j++){ 1441 i = rip[*(aj + ai[rip[k]] + j)]; 1442 if (i >= k){ /* only take upper triangular entry */ 1443 cols[ncols_upper] = i; 1444 ncols_upper++; 1445 } 1446 } 1447 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1448 nzk += nlnk; 1449 1450 /* update lnk by computing fill-in for each pivot row to be merged in */ 1451 prow = jl[k]; /* 1st pivot row */ 1452 1453 while (prow < k){ 1454 nextprow = jl[prow]; 1455 /* merge prow into k-th row */ 1456 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1457 jmax = ui[prow+1]; 1458 ncols = jmax-jmin; 1459 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1460 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1461 nzk += nlnk; 1462 1463 /* update il and jl for prow */ 1464 if (jmin < jmax){ 1465 il[prow] = jmin; 1466 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1467 } 1468 prow = nextprow; 1469 } 1470 1471 /* if free space is not available, make more free space */ 1472 if (current_space->local_remaining<nzk) { 1473 i = am - k + 1; /* num of unfactored rows */ 1474 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1475 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1476 reallocs++; 1477 } 1478 1479 /* copy data into free space, then initialize lnk */ 1480 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1481 1482 /* add the k-th row into il and jl */ 1483 if (nzk-1 > 0){ 1484 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1485 jl[k] = jl[i]; jl[i] = k; 1486 il[k] = ui[k] + 1; 1487 } 1488 ui_ptr[k] = current_space->array; 1489 current_space->array += nzk; 1490 current_space->local_used += nzk; 1491 current_space->local_remaining -= nzk; 1492 1493 ui[k+1] = ui[k] + nzk; 1494 } 1495 1496 #if defined(PETSC_USE_DEBUG) 1497 if (ai[am] != 0) { 1498 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1499 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1500 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1501 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1502 } else { 1503 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1504 } 1505 #endif 1506 1507 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1508 ierr = PetscFree(jl);CHKERRQ(ierr); 1509 1510 /* destroy list of free space and other temporary array(s) */ 1511 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1512 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1513 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1514 1515 /* put together the new matrix in MATSEQSBAIJ format */ 1516 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1517 B = *fact; 1518 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1519 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1520 1521 b = (Mat_SeqSBAIJ*)B->data; 1522 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1523 b->singlemalloc = PETSC_FALSE; 1524 /* the next line frees the default space generated by the Create() */ 1525 ierr = PetscFree(b->a);CHKERRQ(ierr); 1526 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1527 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1528 b->j = uj; 1529 b->i = ui; 1530 b->diag = 0; 1531 b->ilen = 0; 1532 b->imax = 0; 1533 b->row = perm; 1534 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1535 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1536 b->icol = perm; 1537 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1538 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1539 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1540 b->maxnz = b->nz = ui[am]; 1541 1542 B->factor = FACTOR_CHOLESKY; 1543 B->info.factor_mallocs = reallocs; 1544 B->info.fill_ratio_given = fill; 1545 if (ai[am] != 0) { 1546 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1547 } else { 1548 B->info.fill_ratio_needed = 0.0; 1549 } 1550 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1551 if (perm_identity){ 1552 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1553 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557