1 2 #include "src/mat/impls/aij/seq/aij.h" 3 #include "src/inline/dot.h" 4 #include "src/inline/spops.h" 5 #include "petscbt.h" 6 #include "src/mat/utils/freespace.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 10 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 11 { 12 PetscFunctionBegin; 13 14 SETERRQ(PETSC_ERR_SUP,"Code not written"); 15 #if !defined(PETSC_USE_DEBUG) 16 PetscFunctionReturn(0); 17 #endif 18 } 19 20 21 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 22 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 27 #undef __FUNCT__ 28 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 29 /* ------------------------------------------------------------ 30 31 This interface was contribed by Tony Caola 32 33 This routine is an interface to the pivoting drop-tolerance 34 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 35 SPARSEKIT2. 36 37 The SPARSEKIT2 routines used here are covered by the GNU 38 copyright; see the file gnu in this directory. 39 40 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 41 help in getting this routine ironed out. 42 43 The major drawback to this routine is that if info->fill is 44 not large enough it fails rather than allocating more space; 45 this can be fixed by hacking/improving the f2c version of 46 Yousef Saad's code. 47 48 ------------------------------------------------------------ 49 */ 50 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 51 { 52 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 53 PetscFunctionBegin; 54 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 55 You can obtain the drop tolerance routines by installing PETSc from\n\ 56 www.mcs.anl.gov/petsc\n"); 57 #else 58 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 59 IS iscolf,isicol,isirow; 60 PetscTruth reorder; 61 PetscErrorCode ierr,sierr; 62 PetscInt *c,*r,*ic,i,n = A->m; 63 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 64 PetscInt *ordcol,*iwk,*iperm,*jw; 65 PetscInt jmax,lfill,job,*o_i,*o_j; 66 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 67 PetscReal af; 68 69 PetscFunctionBegin; 70 71 if (info->dt == PETSC_DEFAULT) info->dt = .005; 72 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 73 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 74 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 75 lfill = (PetscInt)(info->dtcount/2.0); 76 jmax = (PetscInt)(info->fill*a->nz); 77 78 79 /* ------------------------------------------------------------ 80 If reorder=.TRUE., then the original matrix has to be 81 reordered to reflect the user selected ordering scheme, and 82 then de-reordered so it is in it's original format. 83 Because Saad's dperm() is NOT in place, we have to copy 84 the original matrix and allocate more storage. . . 85 ------------------------------------------------------------ 86 */ 87 88 /* set reorder to true if either isrow or iscol is not identity */ 89 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 90 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 91 reorder = PetscNot(reorder); 92 93 94 /* storage for ilu factor */ 95 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 96 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 98 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 99 100 /* ------------------------------------------------------------ 101 Make sure that everything is Fortran formatted (1-Based) 102 ------------------------------------------------------------ 103 */ 104 for (i=old_i[0];i<old_i[n];i++) { 105 old_j[i]++; 106 } 107 for(i=0;i<n+1;i++) { 108 old_i[i]++; 109 }; 110 111 112 if (reorder) { 113 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 114 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 115 for(i=0;i<n;i++) { 116 r[i] = r[i]+1; 117 c[i] = c[i]+1; 118 } 119 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 120 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 122 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 123 for (i=0;i<n;i++) { 124 r[i] = r[i]-1; 125 c[i] = c[i]-1; 126 } 127 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 128 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 222 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 223 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 224 (*fact)->factor = FACTOR_LU; 225 (*fact)->assembled = PETSC_TRUE; 226 227 b = (Mat_SeqAIJ*)(*fact)->data; 228 ierr = PetscFree(b->imax);CHKERRQ(ierr); 229 b->sorted = PETSC_FALSE; 230 b->singlemalloc = PETSC_FALSE; 231 /* the next line frees the default space generated by the MatCreate() */ 232 ierr = PetscFree(b->a);CHKERRQ(ierr); 233 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 248 249 af = ((double)b->nz)/((double)a->nz) + .001; 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 254 255 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 256 257 PetscFunctionReturn(0); 258 #endif 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 263 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 264 { 265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 266 IS isicol; 267 PetscErrorCode ierr; 268 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 269 PetscInt *bi,*bj,*ajtmp; 270 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 271 PetscReal f; 272 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 273 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 274 PetscBT lnkbt; 275 276 PetscFunctionBegin; 277 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 278 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 279 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 280 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 281 282 /* get new row pointers */ 283 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 284 bi[0] = 0; 285 286 /* bdiag is location of diagonal in factor */ 287 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 288 bdiag[0] = 0; 289 290 /* linked list for storing column indices of the active row */ 291 nlnk = n + 1; 292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 293 294 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 295 im = cols + n; 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 310 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; 317 ajtmp = bi_ptr[row] + nzbd; 318 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 319 im[row] = nzbd; 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 321 nzi += nlnk; 322 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 323 324 row = lnk[row]; 325 } 326 327 bi[i+1] = bi[i] + nzi; 328 im[i] = nzi; 329 330 /* mark bdiag */ 331 nzbd = 0; 332 nnz = nzi; 333 k = lnk[n]; 334 while (nnz-- && k < i){ 335 nzbd++; 336 k = lnk[k]; 337 } 338 bdiag[i] = bi[i] + nzbd; 339 340 /* if free space is not available, make more free space */ 341 if (current_space->local_remaining<nzi) { 342 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 343 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 344 reallocs++; 345 } 346 347 /* copy data into free space, then initialize lnk */ 348 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 349 bi_ptr[i] = current_space->array; 350 current_space->array += nzi; 351 current_space->local_used += nzi; 352 current_space->local_remaining -= nzi; 353 } 354 #if defined(PETSC_USE_DEBUG) 355 if (ai[n] != 0) { 356 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 361 } else { 362 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 363 } 364 #endif 365 366 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 367 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 368 369 /* destroy list of free space and other temporary array(s) */ 370 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 371 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 372 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 373 ierr = PetscFree(cols);CHKERRQ(ierr); 374 375 /* put together the new matrix */ 376 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 ierr = PetscFree(b->imax);CHKERRQ(ierr); 382 b->singlemalloc = PETSC_FALSE; 383 /* the next line frees the default space generated by the Create() */ 384 ierr = PetscFree(b->a);CHKERRQ(ierr); 385 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 386 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 387 b->j = bj; 388 b->i = bi; 389 b->diag = bdiag; 390 b->ilen = 0; 391 b->imax = 0; 392 b->row = isrow; 393 b->col = iscol; 394 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 395 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 396 b->icol = isicol; 397 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 398 399 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 400 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 401 b->maxnz = b->nz = bi[n] ; 402 403 (*B)->factor = FACTOR_LU; 404 (*B)->info.factor_mallocs = reallocs; 405 (*B)->info.fill_ratio_given = f; 406 407 if (ai[n] != 0) { 408 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 409 } else { 410 (*B)->info.fill_ratio_needed = 0.0; 411 } 412 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 413 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 414 PetscFunctionReturn(0); 415 } 416 417 /* ----------------------------------------------------------- */ 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 420 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 421 { 422 Mat C=*B; 423 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 424 IS isrow = b->row,isicol = b->icol; 425 PetscErrorCode ierr; 426 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 427 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 428 PetscInt *diag_offset = b->diag,diag,*pj; 429 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 430 PetscReal rs,d; 431 LUShift_Ctx sctx; 432 PetscInt newshift; 433 434 PetscFunctionBegin; 435 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 436 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 437 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 438 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 439 rtmps = rtmp; ics = ic; 440 441 if (!a->diag) { 442 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 443 } 444 /* if both shift schemes are chosen by user, only use info->shiftpd */ 445 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 446 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 447 PetscInt *aai = a->i,*ddiag = a->diag; 448 sctx.shift_top = 0; 449 for (i=0; i<n; i++) { 450 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 451 d = (a->a)[ddiag[i]]; 452 rs = -PetscAbsScalar(d) - PetscRealPart(d); 453 v = a->a+aai[i]; 454 nz = aai[i+1] - aai[i]; 455 for (j=0; j<nz; j++) 456 rs += PetscAbsScalar(v[j]); 457 if (rs>sctx.shift_top) sctx.shift_top = rs; 458 } 459 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 460 sctx.shift_top *= 1.1; 461 sctx.nshift_max = 5; 462 sctx.shift_lo = 0.; 463 sctx.shift_hi = 1.; 464 } 465 466 sctx.shift_amount = 0; 467 sctx.nshift = 0; 468 do { 469 sctx.lushift = PETSC_FALSE; 470 for (i=0; i<n; i++){ 471 nz = bi[i+1] - bi[i]; 472 bjtmp = bj + bi[i]; 473 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 474 475 /* load in initial (unfactored row) */ 476 nz = a->i[r[i]+1] - a->i[r[i]]; 477 ajtmp = a->j + a->i[r[i]]; 478 v = a->a + a->i[r[i]]; 479 for (j=0; j<nz; j++) { 480 rtmp[ics[ajtmp[j]]] = v[j]; 481 } 482 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 483 484 row = *bjtmp++; 485 while (row < i) { 486 pc = rtmp + row; 487 if (*pc != 0.0) { 488 pv = b->a + diag_offset[row]; 489 pj = b->j + diag_offset[row] + 1; 490 multiplier = *pc / *pv++; 491 *pc = multiplier; 492 nz = bi[row+1] - diag_offset[row] - 1; 493 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 494 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 495 } 496 row = *bjtmp++; 497 } 498 /* finished row so stick it into b->a */ 499 pv = b->a + bi[i] ; 500 pj = b->j + bi[i] ; 501 nz = bi[i+1] - bi[i]; 502 diag = diag_offset[i] - bi[i]; 503 rs = 0.0; 504 for (j=0; j<nz; j++) { 505 pv[j] = rtmps[pj[j]]; 506 if (j != diag) rs += PetscAbsScalar(pv[j]); 507 } 508 509 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 510 sctx.rs = rs; 511 sctx.pv = pv[diag]; 512 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 513 if (newshift == 1){ 514 break; /* sctx.shift_amount is updated */ 515 } else if (newshift == -1){ 516 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 517 } 518 } 519 520 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 521 /* 522 * if no shift in this attempt & shifting & started shifting & can refine, 523 * then try lower shift 524 */ 525 sctx.shift_hi = info->shift_fraction; 526 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 527 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 528 sctx.lushift = PETSC_TRUE; 529 sctx.nshift++; 530 } 531 } while (sctx.lushift); 532 533 /* invert diagonal entries for simplier triangular solves */ 534 for (i=0; i<n; i++) { 535 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 536 } 537 538 ierr = PetscFree(rtmp);CHKERRQ(ierr); 539 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 540 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 541 C->factor = FACTOR_LU; 542 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 543 C->assembled = PETSC_TRUE; 544 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 545 if (sctx.nshift){ 546 if (info->shiftnz) { 547 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 548 } else if (info->shiftpd) { 549 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 550 } 551 } 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 557 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 558 { 559 PetscFunctionBegin; 560 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 561 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 562 PetscFunctionReturn(0); 563 } 564 565 566 /* ----------------------------------------------------------- */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatLUFactor_SeqAIJ" 569 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 570 { 571 PetscErrorCode ierr; 572 Mat C; 573 574 PetscFunctionBegin; 575 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 576 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 577 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 578 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 /* ----------------------------------------------------------- */ 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatSolve_SeqAIJ" 584 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 585 { 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 587 IS iscol = a->col,isrow = a->row; 588 PetscErrorCode ierr; 589 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 590 PetscInt nz,*rout,*cout; 591 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 592 593 PetscFunctionBegin; 594 if (!n) PetscFunctionReturn(0); 595 596 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 tmp = a->solve_work; 599 600 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 601 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 602 603 /* forward solve the lower triangular */ 604 tmp[0] = b[*r++]; 605 tmps = tmp; 606 for (i=1; i<n; i++) { 607 v = aa + ai[i] ; 608 vi = aj + ai[i] ; 609 nz = a->diag[i] - ai[i]; 610 sum = b[*r++]; 611 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 612 tmp[i] = sum; 613 } 614 615 /* backward solve the upper triangular */ 616 for (i=n-1; i>=0; i--){ 617 v = aa + a->diag[i] + 1; 618 vi = aj + a->diag[i] + 1; 619 nz = ai[i+1] - a->diag[i] - 1; 620 sum = tmp[i]; 621 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 622 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 623 } 624 625 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 626 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 627 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 628 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 629 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* ----------------------------------------------------------- */ 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 636 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 637 { 638 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 639 PetscErrorCode ierr; 640 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 641 PetscScalar *x,*b,*aa = a->a; 642 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 643 PetscInt adiag_i,i,*vi,nz,ai_i; 644 PetscScalar *v,sum; 645 #endif 646 647 PetscFunctionBegin; 648 if (!n) PetscFunctionReturn(0); 649 650 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 651 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 652 653 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 654 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 655 #else 656 /* forward solve the lower triangular */ 657 x[0] = b[0]; 658 for (i=1; i<n; i++) { 659 ai_i = ai[i]; 660 v = aa + ai_i; 661 vi = aj + ai_i; 662 nz = adiag[i] - ai_i; 663 sum = b[i]; 664 while (nz--) sum -= *v++ * x[*vi++]; 665 x[i] = sum; 666 } 667 668 /* backward solve the upper triangular */ 669 for (i=n-1; i>=0; i--){ 670 adiag_i = adiag[i]; 671 v = aa + adiag_i + 1; 672 vi = aj + adiag_i + 1; 673 nz = ai[i+1] - adiag_i - 1; 674 sum = x[i]; 675 while (nz--) sum -= *v++ * x[*vi++]; 676 x[i] = sum*aa[adiag_i]; 677 } 678 #endif 679 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 687 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 688 { 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 IS iscol = a->col,isrow = a->row; 691 PetscErrorCode ierr; 692 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 693 PetscInt nz,*rout,*cout; 694 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 695 696 PetscFunctionBegin; 697 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 698 699 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 700 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 701 tmp = a->solve_work; 702 703 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 704 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 705 706 /* forward solve the lower triangular */ 707 tmp[0] = b[*r++]; 708 for (i=1; i<n; i++) { 709 v = aa + ai[i] ; 710 vi = aj + ai[i] ; 711 nz = a->diag[i] - ai[i]; 712 sum = b[*r++]; 713 while (nz--) sum -= *v++ * tmp[*vi++ ]; 714 tmp[i] = sum; 715 } 716 717 /* backward solve the upper triangular */ 718 for (i=n-1; i>=0; i--){ 719 v = aa + a->diag[i] + 1; 720 vi = aj + a->diag[i] + 1; 721 nz = ai[i+1] - a->diag[i] - 1; 722 sum = tmp[i]; 723 while (nz--) sum -= *v++ * tmp[*vi++ ]; 724 tmp[i] = sum*aa[a->diag[i]]; 725 x[*c--] += tmp[i]; 726 } 727 728 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 730 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 731 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 732 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 733 734 PetscFunctionReturn(0); 735 } 736 /* -------------------------------------------------------------------*/ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 739 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 740 { 741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 742 IS iscol = a->col,isrow = a->row; 743 PetscErrorCode ierr; 744 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 745 PetscInt nz,*rout,*cout,*diag = a->diag; 746 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 747 748 PetscFunctionBegin; 749 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 750 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 751 tmp = a->solve_work; 752 753 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 754 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 755 756 /* copy the b into temp work space according to permutation */ 757 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 758 759 /* forward solve the U^T */ 760 for (i=0; i<n; i++) { 761 v = aa + diag[i] ; 762 vi = aj + diag[i] + 1; 763 nz = ai[i+1] - diag[i] - 1; 764 s1 = tmp[i]; 765 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 766 while (nz--) { 767 tmp[*vi++ ] -= (*v++)*s1; 768 } 769 tmp[i] = s1; 770 } 771 772 /* backward solve the L^T */ 773 for (i=n-1; i>=0; i--){ 774 v = aa + diag[i] - 1 ; 775 vi = aj + diag[i] - 1 ; 776 nz = diag[i] - ai[i]; 777 s1 = tmp[i]; 778 while (nz--) { 779 tmp[*vi-- ] -= (*v--)*s1; 780 } 781 } 782 783 /* copy tmp into x according to permutation */ 784 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 791 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 797 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 800 IS iscol = a->col,isrow = a->row; 801 PetscErrorCode ierr; 802 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 803 PetscInt nz,*rout,*cout,*diag = a->diag; 804 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 805 806 PetscFunctionBegin; 807 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 808 809 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 tmp = a->solve_work; 812 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 /* copy the b into temp work space according to permutation */ 817 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 818 819 /* forward solve the U^T */ 820 for (i=0; i<n; i++) { 821 v = aa + diag[i] ; 822 vi = aj + diag[i] + 1; 823 nz = ai[i+1] - diag[i] - 1; 824 tmp[i] *= *v++; 825 while (nz--) { 826 tmp[*vi++ ] -= (*v++)*tmp[i]; 827 } 828 } 829 830 /* backward solve the L^T */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + diag[i] - 1 ; 833 vi = aj + diag[i] - 1 ; 834 nz = diag[i] - ai[i]; 835 while (nz--) { 836 tmp[*vi-- ] -= (*v--)*tmp[i]; 837 } 838 } 839 840 /* copy tmp into x according to permutation */ 841 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 848 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 /* ----------------------------------------------------------------*/ 852 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 856 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 859 IS isicol; 860 PetscErrorCode ierr; 861 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 862 PetscInt *bi,*cols,nnz,*cols_lvl; 863 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 864 PetscInt i,levels,diagonal_fill; 865 PetscTruth col_identity,row_identity; 866 PetscReal f; 867 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 868 PetscBT lnkbt; 869 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 870 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 871 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 872 873 PetscFunctionBegin; 874 f = info->fill; 875 levels = (PetscInt)info->levels; 876 diagonal_fill = (PetscInt)info->diagonal_fill; 877 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 878 879 /* special case that simply copies fill pattern */ 880 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 881 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 882 if (!levels && row_identity && col_identity) { 883 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 884 (*fact)->factor = FACTOR_LU; 885 b = (Mat_SeqAIJ*)(*fact)->data; 886 if (!b->diag) { 887 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 } 889 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 890 b->row = isrow; 891 b->col = iscol; 892 b->icol = isicol; 893 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 894 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 895 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 901 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 902 903 /* get new row pointers */ 904 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 905 bi[0] = 0; 906 /* bdiag is location of diagonal in factor */ 907 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 908 bdiag[0] = 0; 909 910 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 911 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 912 913 /* create a linked list for storing column indices of the active row */ 914 nlnk = n + 1; 915 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 916 917 /* initial FreeSpace size is f*(ai[n]+1) */ 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 919 current_space = free_space; 920 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 921 current_space_lvl = free_space_lvl; 922 923 for (i=0; i<n; i++) { 924 nzi = 0; 925 /* copy current row into linked list */ 926 nnz = ai[r[i]+1] - ai[r[i]]; 927 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 928 cols = aj + ai[r[i]]; 929 lnk[i] = -1; /* marker to indicate if diagonal exists */ 930 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 931 nzi += nlnk; 932 933 /* make sure diagonal entry is included */ 934 if (diagonal_fill && lnk[i] == -1) { 935 fm = n; 936 while (lnk[fm] < i) fm = lnk[fm]; 937 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 938 lnk[fm] = i; 939 lnk_lvl[i] = 0; 940 nzi++; dcount++; 941 } 942 943 /* add pivot rows into the active row */ 944 nzbd = 0; 945 prow = lnk[n]; 946 while (prow < i) { 947 nnz = bdiag[prow]; 948 cols = bj_ptr[prow] + nnz + 1; 949 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 950 nnz = bi[prow+1] - bi[prow] - nnz - 1; 951 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 952 nzi += nlnk; 953 prow = lnk[prow]; 954 nzbd++; 955 } 956 bdiag[i] = nzbd; 957 bi[i+1] = bi[i] + nzi; 958 959 /* if free space is not available, make more free space */ 960 if (current_space->local_remaining<nzi) { 961 nnz = nzi*(n - i); /* estimated and max additional space needed */ 962 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 963 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 964 reallocs++; 965 } 966 967 /* copy data into free_space and free_space_lvl, then initialize lnk */ 968 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 969 bj_ptr[i] = current_space->array; 970 bjlvl_ptr[i] = current_space_lvl->array; 971 972 /* make sure the active row i has diagonal entry */ 973 if (*(bj_ptr[i]+bdiag[i]) != i) { 974 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 975 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 976 } 977 978 current_space->array += nzi; 979 current_space->local_used += nzi; 980 current_space->local_remaining -= nzi; 981 current_space_lvl->array += nzi; 982 current_space_lvl->local_used += nzi; 983 current_space_lvl->local_remaining -= nzi; 984 } 985 986 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 987 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 988 989 /* destroy list of free space and other temporary arrays */ 990 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 991 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 992 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 993 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 994 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 995 996 #if defined(PETSC_USE_DEBUG) 997 { 998 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1003 if (diagonal_fill) { 1004 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1005 } 1006 } 1007 #endif 1008 1009 /* put together the new matrix */ 1010 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1011 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1012 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1013 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1014 b = (Mat_SeqAIJ*)(*fact)->data; 1015 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1016 b->singlemalloc = PETSC_FALSE; 1017 len = (bi[n] )*sizeof(PetscScalar); 1018 /* the next line frees the default space generated by the Create() */ 1019 ierr = PetscFree(b->a);CHKERRQ(ierr); 1020 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1021 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1022 b->j = bj; 1023 b->i = bi; 1024 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1025 b->diag = bdiag; 1026 b->ilen = 0; 1027 b->imax = 0; 1028 b->row = isrow; 1029 b->col = iscol; 1030 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1031 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1032 b->icol = isicol; 1033 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1034 /* In b structure: Free imax, ilen, old a, old j. 1035 Allocate bdiag, solve_work, new a, new j */ 1036 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1037 b->maxnz = b->nz = bi[n] ; 1038 (*fact)->factor = FACTOR_LU; 1039 (*fact)->info.factor_mallocs = reallocs; 1040 (*fact)->info.fill_ratio_given = f; 1041 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1042 1043 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1044 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1045 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #include "src/mat/impls/sbaij/seq/sbaij.h" 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1052 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1053 { 1054 Mat C = *B; 1055 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1056 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1057 IS ip=b->row; 1058 PetscErrorCode ierr; 1059 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1060 PetscInt *ai=a->i,*aj=a->j; 1061 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1062 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1063 PetscReal zeropivot,rs,shiftnz; 1064 PetscTruth shiftpd; 1065 ChShift_Ctx sctx; 1066 PetscInt newshift; 1067 1068 PetscFunctionBegin; 1069 shiftnz = info->shiftnz; 1070 shiftpd = info->shiftpd; 1071 zeropivot = info->zeropivot; 1072 1073 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1074 1075 /* initialization */ 1076 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1077 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1078 jl = il + mbs; 1079 rtmp = (MatScalar*)(jl + mbs); 1080 1081 sctx.shift_amount = 0; 1082 sctx.nshift = 0; 1083 do { 1084 sctx.chshift = PETSC_FALSE; 1085 for (i=0; i<mbs; i++) { 1086 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1087 } 1088 1089 for (k = 0; k<mbs; k++){ 1090 bval = ba + bi[k]; 1091 /* initialize k-th row by the perm[k]-th row of A */ 1092 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1093 for (j = jmin; j < jmax; j++){ 1094 col = rip[aj[j]]; 1095 if (col >= k){ /* only take upper triangular entry */ 1096 rtmp[col] = aa[j]; 1097 *bval++ = 0.0; /* for in-place factorization */ 1098 } 1099 } 1100 /* shift the diagonal of the matrix */ 1101 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1102 1103 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1104 dk = rtmp[k]; 1105 i = jl[k]; /* first row to be added to k_th row */ 1106 1107 while (i < k){ 1108 nexti = jl[i]; /* next row to be added to k_th row */ 1109 1110 /* compute multiplier, update diag(k) and U(i,k) */ 1111 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1112 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1113 dk += uikdi*ba[ili]; 1114 ba[ili] = uikdi; /* -U(i,k) */ 1115 1116 /* add multiple of row i to k-th row */ 1117 jmin = ili + 1; jmax = bi[i+1]; 1118 if (jmin < jmax){ 1119 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1120 /* update il and jl for row i */ 1121 il[i] = jmin; 1122 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1123 } 1124 i = nexti; 1125 } 1126 1127 /* shift the diagonals when zero pivot is detected */ 1128 /* compute rs=sum of abs(off-diagonal) */ 1129 rs = 0.0; 1130 jmin = bi[k]+1; 1131 nz = bi[k+1] - jmin; 1132 if (nz){ 1133 bcol = bj + jmin; 1134 while (nz--){ 1135 rs += PetscAbsScalar(rtmp[*bcol]); 1136 bcol++; 1137 } 1138 } 1139 1140 sctx.rs = rs; 1141 sctx.pv = dk; 1142 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1143 if (newshift == 1){ 1144 break; /* sctx.shift_amount is updated */ 1145 } else if (newshift == -1){ 1146 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1147 } 1148 1149 /* copy data into U(k,:) */ 1150 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1151 jmin = bi[k]+1; jmax = bi[k+1]; 1152 if (jmin < jmax) { 1153 for (j=jmin; j<jmax; j++){ 1154 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1155 } 1156 /* add the k-th row into il and jl */ 1157 il[k] = jmin; 1158 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1159 } 1160 } 1161 } while (sctx.chshift); 1162 ierr = PetscFree(il);CHKERRQ(ierr); 1163 1164 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1165 C->factor = FACTOR_CHOLESKY; 1166 C->assembled = PETSC_TRUE; 1167 C->preallocated = PETSC_TRUE; 1168 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1169 if (sctx.nshift){ 1170 if (shiftnz) { 1171 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1172 } else if (shiftpd) { 1173 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1174 } 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1181 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1182 { 1183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1184 Mat_SeqSBAIJ *b; 1185 Mat B; 1186 PetscErrorCode ierr; 1187 PetscTruth perm_identity; 1188 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1189 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1190 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1191 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1192 PetscReal fill=info->fill,levels=info->levels; 1193 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1194 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1195 PetscBT lnkbt; 1196 1197 PetscFunctionBegin; 1198 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1199 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1200 1201 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1202 ui[0] = 0; 1203 1204 /* special case that simply copies fill pattern */ 1205 if (!levels && perm_identity) { 1206 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1207 for (i=0; i<am; i++) { 1208 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1209 } 1210 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1211 cols = uj; 1212 for (i=0; i<am; i++) { 1213 aj = a->j + a->diag[i]; 1214 ncols = ui[i+1] - ui[i]; 1215 for (j=0; j<ncols; j++) *cols++ = *aj++; 1216 } 1217 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1218 /* initialization */ 1219 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1220 1221 /* jl: linked list for storing indices of the pivot rows 1222 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1223 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1224 il = jl + am; 1225 uj_ptr = (PetscInt**)(il + am); 1226 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1227 for (i=0; i<am; i++){ 1228 jl[i] = am; il[i] = 0; 1229 } 1230 1231 /* create and initialize a linked list for storing column indices of the active row k */ 1232 nlnk = am + 1; 1233 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1234 1235 /* initial FreeSpace size is fill*(ai[am]+1) */ 1236 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1237 current_space = free_space; 1238 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1239 current_space_lvl = free_space_lvl; 1240 1241 for (k=0; k<am; k++){ /* for each active row k */ 1242 /* initialize lnk by the column indices of row rip[k] of A */ 1243 nzk = 0; 1244 ncols = ai[rip[k]+1] - ai[rip[k]]; 1245 ncols_upper = 0; 1246 for (j=0; j<ncols; j++){ 1247 i = *(aj + ai[rip[k]] + j); 1248 if (rip[i] >= k){ /* only take upper triangular entry */ 1249 ajtmp[ncols_upper] = i; 1250 ncols_upper++; 1251 } 1252 } 1253 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1254 nzk += nlnk; 1255 1256 /* update lnk by computing fill-in for each pivot row to be merged in */ 1257 prow = jl[k]; /* 1st pivot row */ 1258 1259 while (prow < k){ 1260 nextprow = jl[prow]; 1261 1262 /* merge prow into k-th row */ 1263 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1264 jmax = ui[prow+1]; 1265 ncols = jmax-jmin; 1266 i = jmin - ui[prow]; 1267 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1268 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1269 j = *(uj - 1); 1270 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1271 nzk += nlnk; 1272 1273 /* update il and jl for prow */ 1274 if (jmin < jmax){ 1275 il[prow] = jmin; 1276 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1277 } 1278 prow = nextprow; 1279 } 1280 1281 /* if free space is not available, make more free space */ 1282 if (current_space->local_remaining<nzk) { 1283 i = am - k + 1; /* num of unfactored rows */ 1284 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1285 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1286 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1287 reallocs++; 1288 } 1289 1290 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1291 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1292 1293 /* add the k-th row into il and jl */ 1294 if (nzk > 1){ 1295 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1296 jl[k] = jl[i]; jl[i] = k; 1297 il[k] = ui[k] + 1; 1298 } 1299 uj_ptr[k] = current_space->array; 1300 uj_lvl_ptr[k] = current_space_lvl->array; 1301 1302 current_space->array += nzk; 1303 current_space->local_used += nzk; 1304 current_space->local_remaining -= nzk; 1305 1306 current_space_lvl->array += nzk; 1307 current_space_lvl->local_used += nzk; 1308 current_space_lvl->local_remaining -= nzk; 1309 1310 ui[k+1] = ui[k] + nzk; 1311 } 1312 1313 #if defined(PETSC_USE_DEBUG) 1314 if (ai[am] != 0) { 1315 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1316 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1317 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1318 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1321 } 1322 #endif 1323 1324 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1325 ierr = PetscFree(jl);CHKERRQ(ierr); 1326 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1327 1328 /* destroy list of free space and other temporary array(s) */ 1329 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1330 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1331 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1332 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1333 1334 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1335 1336 /* put together the new matrix in MATSEQSBAIJ format */ 1337 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1338 B = *fact; 1339 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1340 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1341 1342 b = (Mat_SeqSBAIJ*)B->data; 1343 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1344 b->singlemalloc = PETSC_FALSE; 1345 /* the next line frees the default space generated by the Create() */ 1346 ierr = PetscFree(b->a);CHKERRQ(ierr); 1347 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1348 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1349 b->j = uj; 1350 b->i = ui; 1351 b->diag = 0; 1352 b->ilen = 0; 1353 b->imax = 0; 1354 b->row = perm; 1355 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1356 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1357 b->icol = perm; 1358 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1359 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1360 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1361 b->maxnz = b->nz = ui[am]; 1362 1363 B->factor = FACTOR_CHOLESKY; 1364 B->info.factor_mallocs = reallocs; 1365 B->info.fill_ratio_given = fill; 1366 if (ai[am] != 0) { 1367 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1368 } else { 1369 B->info.fill_ratio_needed = 0.0; 1370 } 1371 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1372 if (perm_identity){ 1373 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1374 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1381 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1382 { 1383 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1384 Mat_SeqSBAIJ *b; 1385 Mat B; 1386 PetscErrorCode ierr; 1387 PetscTruth perm_identity; 1388 PetscReal fill = info->fill; 1389 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1390 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1391 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1392 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1393 PetscBT lnkbt; 1394 IS iperm; 1395 1396 PetscFunctionBegin; 1397 /* check whether perm is the identity mapping */ 1398 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1399 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1400 1401 if (!perm_identity){ 1402 /* check if perm is symmetric! */ 1403 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1404 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1405 for (i=0; i<am; i++) { 1406 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1407 } 1408 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1409 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1410 } 1411 1412 /* initialization */ 1413 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1414 ui[0] = 0; 1415 1416 /* jl: linked list for storing indices of the pivot rows 1417 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1418 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1419 il = jl + am; 1420 cols = il + am; 1421 ui_ptr = (PetscInt**)(cols + am); 1422 for (i=0; i<am; i++){ 1423 jl[i] = am; il[i] = 0; 1424 } 1425 1426 /* create and initialize a linked list for storing column indices of the active row k */ 1427 nlnk = am + 1; 1428 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1429 1430 /* initial FreeSpace size is fill*(ai[am]+1) */ 1431 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1432 current_space = free_space; 1433 1434 for (k=0; k<am; k++){ /* for each active row k */ 1435 /* initialize lnk by the column indices of row rip[k] of A */ 1436 nzk = 0; 1437 ncols = ai[rip[k]+1] - ai[rip[k]]; 1438 ncols_upper = 0; 1439 for (j=0; j<ncols; j++){ 1440 i = rip[*(aj + ai[rip[k]] + j)]; 1441 if (i >= k){ /* only take upper triangular entry */ 1442 cols[ncols_upper] = i; 1443 ncols_upper++; 1444 } 1445 } 1446 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1447 nzk += nlnk; 1448 1449 /* update lnk by computing fill-in for each pivot row to be merged in */ 1450 prow = jl[k]; /* 1st pivot row */ 1451 1452 while (prow < k){ 1453 nextprow = jl[prow]; 1454 /* merge prow into k-th row */ 1455 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1456 jmax = ui[prow+1]; 1457 ncols = jmax-jmin; 1458 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1459 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1460 nzk += nlnk; 1461 1462 /* update il and jl for prow */ 1463 if (jmin < jmax){ 1464 il[prow] = jmin; 1465 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1466 } 1467 prow = nextprow; 1468 } 1469 1470 /* if free space is not available, make more free space */ 1471 if (current_space->local_remaining<nzk) { 1472 i = am - k + 1; /* num of unfactored rows */ 1473 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1474 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1475 reallocs++; 1476 } 1477 1478 /* copy data into free space, then initialize lnk */ 1479 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1480 1481 /* add the k-th row into il and jl */ 1482 if (nzk-1 > 0){ 1483 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1484 jl[k] = jl[i]; jl[i] = k; 1485 il[k] = ui[k] + 1; 1486 } 1487 ui_ptr[k] = current_space->array; 1488 current_space->array += nzk; 1489 current_space->local_used += nzk; 1490 current_space->local_remaining -= nzk; 1491 1492 ui[k+1] = ui[k] + nzk; 1493 } 1494 1495 #if defined(PETSC_USE_DEBUG) 1496 if (ai[am] != 0) { 1497 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1498 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1499 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1500 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1501 } else { 1502 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1503 } 1504 #endif 1505 1506 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1507 ierr = PetscFree(jl);CHKERRQ(ierr); 1508 1509 /* destroy list of free space and other temporary array(s) */ 1510 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1511 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1512 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1513 1514 /* put together the new matrix in MATSEQSBAIJ format */ 1515 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1516 B = *fact; 1517 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1518 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1519 1520 b = (Mat_SeqSBAIJ*)B->data; 1521 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1522 b->singlemalloc = PETSC_FALSE; 1523 /* the next line frees the default space generated by the Create() */ 1524 ierr = PetscFree(b->a);CHKERRQ(ierr); 1525 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1526 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1527 b->j = uj; 1528 b->i = ui; 1529 b->diag = 0; 1530 b->ilen = 0; 1531 b->imax = 0; 1532 b->row = perm; 1533 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1534 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1535 b->icol = perm; 1536 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1537 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1538 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1539 b->maxnz = b->nz = ui[am]; 1540 1541 B->factor = FACTOR_CHOLESKY; 1542 B->info.factor_mallocs = reallocs; 1543 B->info.fill_ratio_given = fill; 1544 if (ai[am] != 0) { 1545 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1546 } else { 1547 B->info.fill_ratio_needed = 0.0; 1548 } 1549 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1550 if (perm_identity){ 1551 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1552 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556