1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->m; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 311 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 nzi += nlnk; 313 314 /* add pivot rows into linked list */ 315 row = lnk[n]; 316 while (row < i) { 317 nzbd = bdiag[row] - bi[row] + 1; 318 ajtmp = bi_ptr[row] + nzbd; 319 nnz = im[row] - nzbd; /* num of columns with row<indices<=i ? */ 320 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 321 nzi += nlnk; 322 row = lnk[row]; 323 } 324 325 bi[i+1] = bi[i] + nzi; 326 im[i] = nzi; 327 328 /* mark bdiag */ 329 nzbd = 0; 330 nnz = nzi; 331 k = lnk[n]; 332 while (nnz-- && k < i){ 333 nzbd++; 334 k = lnk[k]; 335 } 336 bdiag[i] = bi[i] + nzbd; 337 338 /* if free space is not available, make more free space */ 339 if (current_space->local_remaining<nzi) { 340 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 341 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 342 reallocs++; 343 } 344 345 /* copy data into free space, then initialize lnk */ 346 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 347 bi_ptr[i] = current_space->array; 348 current_space->array += nzi; 349 current_space->local_used += nzi; 350 current_space->local_remaining -= nzi; 351 } 352 #if defined(PETSC_USE_DEBUG) 353 if (ai[n] != 0) { 354 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 355 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 356 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 359 } else { 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 361 } 362 #endif 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* destroy list of free space and other temporary array(s) */ 368 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 369 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree(cols);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 375 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 376 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 377 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 378 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 379 b = (Mat_SeqAIJ*)(*B)->data; 380 b->freedata = PETSC_TRUE; 381 b->singlemalloc = PETSC_FALSE; 382 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 383 b->j = bj; 384 b->i = bi; 385 b->diag = bdiag; 386 b->ilen = 0; 387 b->imax = 0; 388 b->row = isrow; 389 b->col = iscol; 390 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 391 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 392 b->icol = isicol; 393 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 394 395 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 396 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 397 b->maxnz = b->nz = bi[n] ; 398 399 (*B)->factor = FACTOR_LU; 400 (*B)->info.factor_mallocs = reallocs; 401 (*B)->info.fill_ratio_given = f; 402 403 if (ai[n] != 0) { 404 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 405 } else { 406 (*B)->info.fill_ratio_needed = 0.0; 407 } 408 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 409 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 410 PetscFunctionReturn(0); 411 } 412 413 /* ----------------------------------------------------------- */ 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 416 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 417 { 418 Mat C=*B; 419 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 420 IS isrow = b->row,isicol = b->icol; 421 PetscErrorCode ierr; 422 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 423 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 424 PetscInt *diag_offset = b->diag,diag,*pj; 425 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 426 PetscScalar d; 427 PetscReal rs; 428 LUShift_Ctx sctx; 429 PetscInt newshift; 430 431 PetscFunctionBegin; 432 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 433 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 434 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 435 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 436 rtmps = rtmp; ics = ic; 437 438 if (!a->diag) { 439 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 440 } 441 /* if both shift schemes are chosen by user, only use info->shiftpd */ 442 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 443 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 444 PetscInt *aai = a->i,*ddiag = a->diag; 445 sctx.shift_top = 0; 446 for (i=0; i<n; i++) { 447 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 448 d = (a->a)[ddiag[i]]; 449 rs = -PetscAbsScalar(d) - PetscRealPart(d); 450 v = a->a+aai[i]; 451 nz = aai[i+1] - aai[i]; 452 for (j=0; j<nz; j++) 453 rs += PetscAbsScalar(v[j]); 454 if (rs>sctx.shift_top) sctx.shift_top = rs; 455 } 456 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 457 sctx.shift_top *= 1.1; 458 sctx.nshift_max = 5; 459 sctx.shift_lo = 0.; 460 sctx.shift_hi = 1.; 461 } 462 463 sctx.shift_amount = 0; 464 sctx.nshift = 0; 465 do { 466 sctx.lushift = PETSC_FALSE; 467 for (i=0; i<n; i++){ 468 nz = bi[i+1] - bi[i]; 469 bjtmp = bj + bi[i]; 470 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 471 472 /* load in initial (unfactored row) */ 473 nz = a->i[r[i]+1] - a->i[r[i]]; 474 ajtmp = a->j + a->i[r[i]]; 475 v = a->a + a->i[r[i]]; 476 for (j=0; j<nz; j++) { 477 rtmp[ics[ajtmp[j]]] = v[j]; 478 } 479 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 480 481 row = *bjtmp++; 482 while (row < i) { 483 pc = rtmp + row; 484 if (*pc != 0.0) { 485 pv = b->a + diag_offset[row]; 486 pj = b->j + diag_offset[row] + 1; 487 multiplier = *pc / *pv++; 488 *pc = multiplier; 489 nz = bi[row+1] - diag_offset[row] - 1; 490 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 491 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 492 } 493 row = *bjtmp++; 494 } 495 /* finished row so stick it into b->a */ 496 pv = b->a + bi[i] ; 497 pj = b->j + bi[i] ; 498 nz = bi[i+1] - bi[i]; 499 diag = diag_offset[i] - bi[i]; 500 rs = 0.0; 501 for (j=0; j<nz; j++) { 502 pv[j] = rtmps[pj[j]]; 503 if (j != diag) rs += PetscAbsScalar(pv[j]); 504 } 505 506 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 507 sctx.rs = rs; 508 sctx.pv = pv[diag]; 509 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 510 if (newshift == 1){ 511 break; /* sctx.shift_amount is updated */ 512 } else if (newshift == -1){ 513 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 514 } 515 } 516 517 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 518 /* 519 * if no shift in this attempt & shifting & started shifting & can refine, 520 * then try lower shift 521 */ 522 sctx.shift_hi = info->shift_fraction; 523 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 524 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 525 sctx.lushift = PETSC_TRUE; 526 sctx.nshift++; 527 } 528 } while (sctx.lushift); 529 530 /* invert diagonal entries for simplier triangular solves */ 531 for (i=0; i<n; i++) { 532 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 533 } 534 535 ierr = PetscFree(rtmp);CHKERRQ(ierr); 536 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 538 C->factor = FACTOR_LU; 539 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 540 C->assembled = PETSC_TRUE; 541 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 542 if (sctx.nshift){ 543 if (info->shiftnz) { 544 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 545 } else if (info->shiftpd) { 546 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 547 } 548 } 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 554 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 555 { 556 PetscFunctionBegin; 557 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 558 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 559 PetscFunctionReturn(0); 560 } 561 562 563 /* ----------------------------------------------------------- */ 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatLUFactor_SeqAIJ" 566 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 567 { 568 PetscErrorCode ierr; 569 Mat C; 570 571 PetscFunctionBegin; 572 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 573 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 574 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 575 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 /* ----------------------------------------------------------- */ 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatSolve_SeqAIJ" 581 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 582 { 583 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 584 IS iscol = a->col,isrow = a->row; 585 PetscErrorCode ierr; 586 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 587 PetscInt nz,*rout,*cout; 588 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 589 590 PetscFunctionBegin; 591 if (!n) PetscFunctionReturn(0); 592 593 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 tmp = a->solve_work; 596 597 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 598 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 599 600 /* forward solve the lower triangular */ 601 tmp[0] = b[*r++]; 602 tmps = tmp; 603 for (i=1; i<n; i++) { 604 v = aa + ai[i] ; 605 vi = aj + ai[i] ; 606 nz = a->diag[i] - ai[i]; 607 sum = b[*r++]; 608 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 609 tmp[i] = sum; 610 } 611 612 /* backward solve the upper triangular */ 613 for (i=n-1; i>=0; i--){ 614 v = aa + a->diag[i] + 1; 615 vi = aj + a->diag[i] + 1; 616 nz = ai[i+1] - a->diag[i] - 1; 617 sum = tmp[i]; 618 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 619 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 620 } 621 622 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 623 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 624 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 625 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 626 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 630 /* ----------------------------------------------------------- */ 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 633 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 634 { 635 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 636 PetscErrorCode ierr; 637 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 638 PetscScalar *x,*b,*aa = a->a; 639 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 640 PetscInt adiag_i,i,*vi,nz,ai_i; 641 PetscScalar *v,sum; 642 #endif 643 644 PetscFunctionBegin; 645 if (!n) PetscFunctionReturn(0); 646 647 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 648 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 649 650 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 651 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 652 #else 653 /* forward solve the lower triangular */ 654 x[0] = b[0]; 655 for (i=1; i<n; i++) { 656 ai_i = ai[i]; 657 v = aa + ai_i; 658 vi = aj + ai_i; 659 nz = adiag[i] - ai_i; 660 sum = b[i]; 661 while (nz--) sum -= *v++ * x[*vi++]; 662 x[i] = sum; 663 } 664 665 /* backward solve the upper triangular */ 666 for (i=n-1; i>=0; i--){ 667 adiag_i = adiag[i]; 668 v = aa + adiag_i + 1; 669 vi = aj + adiag_i + 1; 670 nz = ai[i+1] - adiag_i - 1; 671 sum = x[i]; 672 while (nz--) sum -= *v++ * x[*vi++]; 673 x[i] = sum*aa[adiag_i]; 674 } 675 #endif 676 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 677 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 678 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 684 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 685 { 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 687 IS iscol = a->col,isrow = a->row; 688 PetscErrorCode ierr; 689 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 690 PetscInt nz,*rout,*cout; 691 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 692 693 PetscFunctionBegin; 694 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 695 696 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 tmp = a->solve_work; 699 700 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 701 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 702 703 /* forward solve the lower triangular */ 704 tmp[0] = b[*r++]; 705 for (i=1; i<n; i++) { 706 v = aa + ai[i] ; 707 vi = aj + ai[i] ; 708 nz = a->diag[i] - ai[i]; 709 sum = b[*r++]; 710 while (nz--) sum -= *v++ * tmp[*vi++ ]; 711 tmp[i] = sum; 712 } 713 714 /* backward solve the upper triangular */ 715 for (i=n-1; i>=0; i--){ 716 v = aa + a->diag[i] + 1; 717 vi = aj + a->diag[i] + 1; 718 nz = ai[i+1] - a->diag[i] - 1; 719 sum = tmp[i]; 720 while (nz--) sum -= *v++ * tmp[*vi++ ]; 721 tmp[i] = sum*aa[a->diag[i]]; 722 x[*c--] += tmp[i]; 723 } 724 725 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 726 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 727 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 728 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 729 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 730 731 PetscFunctionReturn(0); 732 } 733 /* -------------------------------------------------------------------*/ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 736 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 737 { 738 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 739 IS iscol = a->col,isrow = a->row; 740 PetscErrorCode ierr; 741 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 742 PetscInt nz,*rout,*cout,*diag = a->diag; 743 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 744 745 PetscFunctionBegin; 746 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 747 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 748 tmp = a->solve_work; 749 750 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 751 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 752 753 /* copy the b into temp work space according to permutation */ 754 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 755 756 /* forward solve the U^T */ 757 for (i=0; i<n; i++) { 758 v = aa + diag[i] ; 759 vi = aj + diag[i] + 1; 760 nz = ai[i+1] - diag[i] - 1; 761 s1 = tmp[i]; 762 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 763 while (nz--) { 764 tmp[*vi++ ] -= (*v++)*s1; 765 } 766 tmp[i] = s1; 767 } 768 769 /* backward solve the L^T */ 770 for (i=n-1; i>=0; i--){ 771 v = aa + diag[i] - 1 ; 772 vi = aj + diag[i] - 1 ; 773 nz = diag[i] - ai[i]; 774 s1 = tmp[i]; 775 while (nz--) { 776 tmp[*vi-- ] -= (*v--)*s1; 777 } 778 } 779 780 /* copy tmp into x according to permutation */ 781 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 782 783 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 784 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 785 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 786 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 787 788 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 794 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 795 { 796 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 797 IS iscol = a->col,isrow = a->row; 798 PetscErrorCode ierr; 799 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 800 PetscInt nz,*rout,*cout,*diag = a->diag; 801 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 802 803 PetscFunctionBegin; 804 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 805 806 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 807 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 808 tmp = a->solve_work; 809 810 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 811 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 812 813 /* copy the b into temp work space according to permutation */ 814 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 815 816 /* forward solve the U^T */ 817 for (i=0; i<n; i++) { 818 v = aa + diag[i] ; 819 vi = aj + diag[i] + 1; 820 nz = ai[i+1] - diag[i] - 1; 821 tmp[i] *= *v++; 822 while (nz--) { 823 tmp[*vi++ ] -= (*v++)*tmp[i]; 824 } 825 } 826 827 /* backward solve the L^T */ 828 for (i=n-1; i>=0; i--){ 829 v = aa + diag[i] - 1 ; 830 vi = aj + diag[i] - 1 ; 831 nz = diag[i] - ai[i]; 832 while (nz--) { 833 tmp[*vi-- ] -= (*v--)*tmp[i]; 834 } 835 } 836 837 /* copy tmp into x according to permutation */ 838 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 839 840 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 841 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 842 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 843 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 844 845 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 /* ----------------------------------------------------------------*/ 849 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 853 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 854 { 855 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 856 IS isicol; 857 PetscErrorCode ierr; 858 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 859 PetscInt *bi,*cols,nnz,*cols_lvl; 860 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 861 PetscInt i,levels,diagonal_fill; 862 PetscTruth col_identity,row_identity; 863 PetscReal f; 864 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 865 PetscBT lnkbt; 866 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 867 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 868 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 869 870 PetscFunctionBegin; 871 f = info->fill; 872 levels = (PetscInt)info->levels; 873 diagonal_fill = (PetscInt)info->diagonal_fill; 874 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 875 876 /* special case that simply copies fill pattern */ 877 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 878 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 879 if (!levels && row_identity && col_identity) { 880 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 881 (*fact)->factor = FACTOR_LU; 882 b = (Mat_SeqAIJ*)(*fact)->data; 883 if (!b->diag) { 884 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 885 } 886 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 887 b->row = isrow; 888 b->col = iscol; 889 b->icol = isicol; 890 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 891 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 892 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 893 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 898 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 899 900 /* get new row pointers */ 901 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 902 bi[0] = 0; 903 /* bdiag is location of diagonal in factor */ 904 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 905 bdiag[0] = 0; 906 907 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 908 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 909 910 /* create a linked list for storing column indices of the active row */ 911 nlnk = n + 1; 912 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 913 914 /* initial FreeSpace size is f*(ai[n]+1) */ 915 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 916 current_space = free_space; 917 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 918 current_space_lvl = free_space_lvl; 919 920 for (i=0; i<n; i++) { 921 nzi = 0; 922 /* copy current row into linked list */ 923 nnz = ai[r[i]+1] - ai[r[i]]; 924 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 925 cols = aj + ai[r[i]]; 926 lnk[i] = -1; /* marker to indicate if diagonal exists */ 927 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 928 nzi += nlnk; 929 930 /* make sure diagonal entry is included */ 931 if (diagonal_fill && lnk[i] == -1) { 932 fm = n; 933 while (lnk[fm] < i) fm = lnk[fm]; 934 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 935 lnk[fm] = i; 936 lnk_lvl[i] = 0; 937 nzi++; dcount++; 938 } 939 940 /* add pivot rows into the active row */ 941 nzbd = 0; 942 prow = lnk[n]; 943 while (prow < i) { 944 nnz = bdiag[prow]; 945 cols = bj_ptr[prow] + nnz + 1; 946 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 947 nnz = bi[prow+1] - bi[prow] - nnz - 1; 948 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 949 nzi += nlnk; 950 prow = lnk[prow]; 951 nzbd++; 952 } 953 bdiag[i] = nzbd; 954 bi[i+1] = bi[i] + nzi; 955 956 /* if free space is not available, make more free space */ 957 if (current_space->local_remaining<nzi) { 958 nnz = nzi*(n - i); /* estimated and max additional space needed */ 959 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 960 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 961 reallocs++; 962 } 963 964 /* copy data into free_space and free_space_lvl, then initialize lnk */ 965 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 966 bj_ptr[i] = current_space->array; 967 bjlvl_ptr[i] = current_space_lvl->array; 968 969 /* make sure the active row i has diagonal entry */ 970 if (*(bj_ptr[i]+bdiag[i]) != i) { 971 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 972 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 973 } 974 975 current_space->array += nzi; 976 current_space->local_used += nzi; 977 current_space->local_remaining -= nzi; 978 current_space_lvl->array += nzi; 979 current_space_lvl->local_used += nzi; 980 current_space_lvl->local_remaining -= nzi; 981 } 982 983 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 984 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 985 986 /* destroy list of free space and other temporary arrays */ 987 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 988 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 989 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 990 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 991 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 992 993 #if defined(PETSC_USE_DEBUG) 994 { 995 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 996 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 998 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1000 if (diagonal_fill) { 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1002 } 1003 } 1004 #endif 1005 1006 /* put together the new matrix */ 1007 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1008 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1009 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1010 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1011 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1012 b = (Mat_SeqAIJ*)(*fact)->data; 1013 b->freedata = PETSC_TRUE; 1014 b->singlemalloc = PETSC_FALSE; 1015 len = (bi[n] )*sizeof(PetscScalar); 1016 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1017 b->j = bj; 1018 b->i = bi; 1019 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1020 b->diag = bdiag; 1021 b->ilen = 0; 1022 b->imax = 0; 1023 b->row = isrow; 1024 b->col = iscol; 1025 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1026 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1027 b->icol = isicol; 1028 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1029 /* In b structure: Free imax, ilen, old a, old j. 1030 Allocate bdiag, solve_work, new a, new j */ 1031 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1032 b->maxnz = b->nz = bi[n] ; 1033 (*fact)->factor = FACTOR_LU; 1034 (*fact)->info.factor_mallocs = reallocs; 1035 (*fact)->info.fill_ratio_given = f; 1036 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1037 1038 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1039 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1040 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #include "src/mat/impls/sbaij/seq/sbaij.h" 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1047 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1048 { 1049 Mat C = *B; 1050 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1051 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1052 IS ip=b->row; 1053 PetscErrorCode ierr; 1054 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1055 PetscInt *ai=a->i,*aj=a->j; 1056 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1057 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1058 PetscReal zeropivot,rs,shiftnz; 1059 PetscTruth shiftpd; 1060 ChShift_Ctx sctx; 1061 PetscInt newshift; 1062 1063 PetscFunctionBegin; 1064 shiftnz = info->shiftnz; 1065 shiftpd = info->shiftpd; 1066 zeropivot = info->zeropivot; 1067 1068 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1069 1070 /* initialization */ 1071 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1072 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1073 jl = il + mbs; 1074 rtmp = (MatScalar*)(jl + mbs); 1075 1076 sctx.shift_amount = 0; 1077 sctx.nshift = 0; 1078 do { 1079 sctx.chshift = PETSC_FALSE; 1080 for (i=0; i<mbs; i++) { 1081 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1082 } 1083 1084 for (k = 0; k<mbs; k++){ 1085 bval = ba + bi[k]; 1086 /* initialize k-th row by the perm[k]-th row of A */ 1087 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1088 for (j = jmin; j < jmax; j++){ 1089 col = rip[aj[j]]; 1090 if (col >= k){ /* only take upper triangular entry */ 1091 rtmp[col] = aa[j]; 1092 *bval++ = 0.0; /* for in-place factorization */ 1093 } 1094 } 1095 /* shift the diagonal of the matrix */ 1096 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1097 1098 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1099 dk = rtmp[k]; 1100 i = jl[k]; /* first row to be added to k_th row */ 1101 1102 while (i < k){ 1103 nexti = jl[i]; /* next row to be added to k_th row */ 1104 1105 /* compute multiplier, update diag(k) and U(i,k) */ 1106 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1107 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1108 dk += uikdi*ba[ili]; 1109 ba[ili] = uikdi; /* -U(i,k) */ 1110 1111 /* add multiple of row i to k-th row */ 1112 jmin = ili + 1; jmax = bi[i+1]; 1113 if (jmin < jmax){ 1114 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1115 /* update il and jl for row i */ 1116 il[i] = jmin; 1117 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1118 } 1119 i = nexti; 1120 } 1121 1122 /* shift the diagonals when zero pivot is detected */ 1123 /* compute rs=sum of abs(off-diagonal) */ 1124 rs = 0.0; 1125 jmin = bi[k]+1; 1126 nz = bi[k+1] - jmin; 1127 if (nz){ 1128 bcol = bj + jmin; 1129 while (nz--){ 1130 rs += PetscAbsScalar(rtmp[*bcol]); 1131 bcol++; 1132 } 1133 } 1134 1135 sctx.rs = rs; 1136 sctx.pv = dk; 1137 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1138 if (newshift == 1){ 1139 break; /* sctx.shift_amount is updated */ 1140 } else if (newshift == -1){ 1141 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1142 } 1143 1144 /* copy data into U(k,:) */ 1145 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1146 jmin = bi[k]+1; jmax = bi[k+1]; 1147 if (jmin < jmax) { 1148 for (j=jmin; j<jmax; j++){ 1149 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1150 } 1151 /* add the k-th row into il and jl */ 1152 il[k] = jmin; 1153 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1154 } 1155 } 1156 } while (sctx.chshift); 1157 ierr = PetscFree(il);CHKERRQ(ierr); 1158 1159 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1160 C->factor = FACTOR_CHOLESKY; 1161 C->assembled = PETSC_TRUE; 1162 C->preallocated = PETSC_TRUE; 1163 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1164 if (sctx.nshift){ 1165 if (shiftnz) { 1166 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1167 } else if (shiftpd) { 1168 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1169 } 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1176 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1177 { 1178 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1179 Mat_SeqSBAIJ *b; 1180 Mat B; 1181 PetscErrorCode ierr; 1182 PetscTruth perm_identity; 1183 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1184 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1185 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1186 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1187 PetscReal fill=info->fill,levels=info->levels; 1188 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1189 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1190 PetscBT lnkbt; 1191 1192 PetscFunctionBegin; 1193 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1194 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1195 1196 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1197 ui[0] = 0; 1198 1199 /* special case that simply copies fill pattern */ 1200 if (!levels && perm_identity) { 1201 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1202 for (i=0; i<am; i++) { 1203 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1204 } 1205 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1206 cols = uj; 1207 for (i=0; i<am; i++) { 1208 aj = a->j + a->diag[i]; 1209 ncols = ui[i+1] - ui[i]; 1210 for (j=0; j<ncols; j++) *cols++ = *aj++; 1211 } 1212 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1213 /* initialization */ 1214 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1215 1216 /* jl: linked list for storing indices of the pivot rows 1217 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1218 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1219 il = jl + am; 1220 uj_ptr = (PetscInt**)(il + am); 1221 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1222 for (i=0; i<am; i++){ 1223 jl[i] = am; il[i] = 0; 1224 } 1225 1226 /* create and initialize a linked list for storing column indices of the active row k */ 1227 nlnk = am + 1; 1228 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1229 1230 /* initial FreeSpace size is fill*(ai[am]+1) */ 1231 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1232 current_space = free_space; 1233 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1234 current_space_lvl = free_space_lvl; 1235 1236 for (k=0; k<am; k++){ /* for each active row k */ 1237 /* initialize lnk by the column indices of row rip[k] of A */ 1238 nzk = 0; 1239 ncols = ai[rip[k]+1] - ai[rip[k]]; 1240 ncols_upper = 0; 1241 for (j=0; j<ncols; j++){ 1242 i = *(aj + ai[rip[k]] + j); 1243 if (rip[i] >= k){ /* only take upper triangular entry */ 1244 ajtmp[ncols_upper] = i; 1245 ncols_upper++; 1246 } 1247 } 1248 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1249 nzk += nlnk; 1250 1251 /* update lnk by computing fill-in for each pivot row to be merged in */ 1252 prow = jl[k]; /* 1st pivot row */ 1253 1254 while (prow < k){ 1255 nextprow = jl[prow]; 1256 1257 /* merge prow into k-th row */ 1258 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1259 jmax = ui[prow+1]; 1260 ncols = jmax-jmin; 1261 i = jmin - ui[prow]; 1262 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1263 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1264 j = *(uj - 1); 1265 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1266 nzk += nlnk; 1267 1268 /* update il and jl for prow */ 1269 if (jmin < jmax){ 1270 il[prow] = jmin; 1271 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1272 } 1273 prow = nextprow; 1274 } 1275 1276 /* if free space is not available, make more free space */ 1277 if (current_space->local_remaining<nzk) { 1278 i = am - k + 1; /* num of unfactored rows */ 1279 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1280 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1281 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1282 reallocs++; 1283 } 1284 1285 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1286 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1287 1288 /* add the k-th row into il and jl */ 1289 if (nzk > 1){ 1290 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1291 jl[k] = jl[i]; jl[i] = k; 1292 il[k] = ui[k] + 1; 1293 } 1294 uj_ptr[k] = current_space->array; 1295 uj_lvl_ptr[k] = current_space_lvl->array; 1296 1297 current_space->array += nzk; 1298 current_space->local_used += nzk; 1299 current_space->local_remaining -= nzk; 1300 1301 current_space_lvl->array += nzk; 1302 current_space_lvl->local_used += nzk; 1303 current_space_lvl->local_remaining -= nzk; 1304 1305 ui[k+1] = ui[k] + nzk; 1306 } 1307 1308 #if defined(PETSC_USE_DEBUG) 1309 if (ai[am] != 0) { 1310 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1311 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1312 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1313 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1314 } else { 1315 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1316 } 1317 #endif 1318 1319 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1320 ierr = PetscFree(jl);CHKERRQ(ierr); 1321 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1322 1323 /* destroy list of free space and other temporary array(s) */ 1324 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1325 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1326 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1327 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1328 1329 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1330 1331 /* put together the new matrix in MATSEQSBAIJ format */ 1332 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1333 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1334 B = *fact; 1335 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1336 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1337 1338 b = (Mat_SeqSBAIJ*)B->data; 1339 b->singlemalloc = PETSC_FALSE; 1340 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1341 b->j = uj; 1342 b->i = ui; 1343 b->diag = 0; 1344 b->ilen = 0; 1345 b->imax = 0; 1346 b->row = perm; 1347 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1348 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1349 b->icol = perm; 1350 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1351 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1352 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1353 b->maxnz = b->nz = ui[am]; 1354 1355 B->factor = FACTOR_CHOLESKY; 1356 B->info.factor_mallocs = reallocs; 1357 B->info.fill_ratio_given = fill; 1358 if (ai[am] != 0) { 1359 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1360 } else { 1361 B->info.fill_ratio_needed = 0.0; 1362 } 1363 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1364 if (perm_identity){ 1365 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1373 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1374 { 1375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1376 Mat_SeqSBAIJ *b; 1377 Mat B; 1378 PetscErrorCode ierr; 1379 PetscTruth perm_identity; 1380 PetscReal fill = info->fill; 1381 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1382 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1383 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1384 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1385 PetscBT lnkbt; 1386 IS iperm; 1387 1388 PetscFunctionBegin; 1389 /* check whether perm is the identity mapping */ 1390 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1391 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1392 1393 if (!perm_identity){ 1394 /* check if perm is symmetric! */ 1395 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1396 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1397 for (i=0; i<am; i++) { 1398 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1399 } 1400 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1401 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1402 } 1403 1404 /* initialization */ 1405 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1406 ui[0] = 0; 1407 1408 /* jl: linked list for storing indices of the pivot rows 1409 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1410 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1411 il = jl + am; 1412 cols = il + am; 1413 ui_ptr = (PetscInt**)(cols + am); 1414 for (i=0; i<am; i++){ 1415 jl[i] = am; il[i] = 0; 1416 } 1417 1418 /* create and initialize a linked list for storing column indices of the active row k */ 1419 nlnk = am + 1; 1420 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1421 1422 /* initial FreeSpace size is fill*(ai[am]+1) */ 1423 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1424 current_space = free_space; 1425 1426 for (k=0; k<am; k++){ /* for each active row k */ 1427 /* initialize lnk by the column indices of row rip[k] of A */ 1428 nzk = 0; 1429 ncols = ai[rip[k]+1] - ai[rip[k]]; 1430 ncols_upper = 0; 1431 for (j=0; j<ncols; j++){ 1432 i = rip[*(aj + ai[rip[k]] + j)]; 1433 if (i >= k){ /* only take upper triangular entry */ 1434 cols[ncols_upper] = i; 1435 ncols_upper++; 1436 } 1437 } 1438 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1439 nzk += nlnk; 1440 1441 /* update lnk by computing fill-in for each pivot row to be merged in */ 1442 prow = jl[k]; /* 1st pivot row */ 1443 1444 while (prow < k){ 1445 nextprow = jl[prow]; 1446 /* merge prow into k-th row */ 1447 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1448 jmax = ui[prow+1]; 1449 ncols = jmax-jmin; 1450 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1451 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1452 nzk += nlnk; 1453 1454 /* update il and jl for prow */ 1455 if (jmin < jmax){ 1456 il[prow] = jmin; 1457 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1458 } 1459 prow = nextprow; 1460 } 1461 1462 /* if free space is not available, make more free space */ 1463 if (current_space->local_remaining<nzk) { 1464 i = am - k + 1; /* num of unfactored rows */ 1465 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1466 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1467 reallocs++; 1468 } 1469 1470 /* copy data into free space, then initialize lnk */ 1471 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1472 1473 /* add the k-th row into il and jl */ 1474 if (nzk-1 > 0){ 1475 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1476 jl[k] = jl[i]; jl[i] = k; 1477 il[k] = ui[k] + 1; 1478 } 1479 ui_ptr[k] = current_space->array; 1480 current_space->array += nzk; 1481 current_space->local_used += nzk; 1482 current_space->local_remaining -= nzk; 1483 1484 ui[k+1] = ui[k] + nzk; 1485 } 1486 1487 #if defined(PETSC_USE_DEBUG) 1488 if (ai[am] != 0) { 1489 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1490 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1491 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1492 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1493 } else { 1494 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1495 } 1496 #endif 1497 1498 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1499 ierr = PetscFree(jl);CHKERRQ(ierr); 1500 1501 /* destroy list of free space and other temporary array(s) */ 1502 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1503 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1504 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1505 1506 /* put together the new matrix in MATSEQSBAIJ format */ 1507 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1508 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1509 B = *fact; 1510 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1511 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1512 1513 b = (Mat_SeqSBAIJ*)B->data; 1514 b->singlemalloc = PETSC_FALSE; 1515 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1516 b->j = uj; 1517 b->i = ui; 1518 b->diag = 0; 1519 b->ilen = 0; 1520 b->imax = 0; 1521 b->row = perm; 1522 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1523 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1524 b->icol = perm; 1525 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1526 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1527 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1528 b->maxnz = b->nz = ui[am]; 1529 1530 B->factor = FACTOR_CHOLESKY; 1531 B->info.factor_mallocs = reallocs; 1532 B->info.fill_ratio_given = fill; 1533 if (ai[am] != 0) { 1534 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1535 } else { 1536 B->info.fill_ratio_needed = 0.0; 1537 } 1538 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1539 if (perm_identity){ 1540 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1541 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545