1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->freedata = PETSC_TRUE; 231 b->sorted = PETSC_FALSE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 247 248 af = ((double)b->nz)/((double)a->nz) + .001; 249 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 250 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 253 254 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 255 256 PetscFunctionReturn(0); 257 #endif 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 262 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 263 { 264 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 265 IS isicol; 266 PetscErrorCode ierr; 267 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 268 PetscInt *bi,*bj,*ajtmp; 269 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 270 PetscReal f; 271 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 272 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 273 PetscBT lnkbt; 274 275 PetscFunctionBegin; 276 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 277 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 278 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 279 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 280 281 /* get new row pointers */ 282 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 283 bi[0] = 0; 284 285 /* bdiag is location of diagonal in factor */ 286 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 287 bdiag[0] = 0; 288 289 /* linked list for storing column indices of the active row */ 290 nlnk = n + 1; 291 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 292 293 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 294 im = cols + n; 295 bi_ptr = (PetscInt**)(im + n); 296 297 /* initial FreeSpace size is f*(ai[n]+1) */ 298 f = info->fill; 299 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 300 current_space = free_space; 301 302 for (i=0; i<n; i++) { 303 /* copy previous fill into linked list */ 304 nzi = 0; 305 nnz = ai[r[i]+1] - ai[r[i]]; 306 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 307 ajtmp = aj + ai[r[i]]; 308 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 309 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; 316 ajtmp = bi_ptr[row] + nzbd; 317 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 318 im[row] = nzbd; 319 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 320 nzi += nlnk; 321 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 322 323 row = lnk[row]; 324 } 325 326 bi[i+1] = bi[i] + nzi; 327 im[i] = nzi; 328 329 /* mark bdiag */ 330 nzbd = 0; 331 nnz = nzi; 332 k = lnk[n]; 333 while (nnz-- && k < i){ 334 nzbd++; 335 k = lnk[k]; 336 } 337 bdiag[i] = bi[i] + nzbd; 338 339 /* if free space is not available, make more free space */ 340 if (current_space->local_remaining<nzi) { 341 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 342 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 343 reallocs++; 344 } 345 346 /* copy data into free space, then initialize lnk */ 347 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 348 bi_ptr[i] = current_space->array; 349 current_space->array += nzi; 350 current_space->local_used += nzi; 351 current_space->local_remaining -= nzi; 352 } 353 #if defined(PETSC_USE_DEBUG) 354 if (ai[n] != 0) { 355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 356 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 357 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));CHKERRQ(ierr); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 360 } else { 361 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 362 } 363 #endif 364 365 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 366 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 367 368 /* destroy list of free space and other temporary array(s) */ 369 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 370 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 371 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 372 ierr = PetscFree(cols);CHKERRQ(ierr); 373 374 /* put together the new matrix */ 375 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 376 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 377 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 378 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 379 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 380 b = (Mat_SeqAIJ*)(*B)->data; 381 b->freedata = PETSC_TRUE; 382 b->singlemalloc = PETSC_FALSE; 383 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 384 b->j = bj; 385 b->i = bi; 386 b->diag = bdiag; 387 b->ilen = 0; 388 b->imax = 0; 389 b->row = isrow; 390 b->col = iscol; 391 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 392 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 393 b->icol = isicol; 394 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 395 396 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 397 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 398 b->maxnz = b->nz = bi[n] ; 399 400 (*B)->factor = FACTOR_LU; 401 (*B)->info.factor_mallocs = reallocs; 402 (*B)->info.fill_ratio_given = f; 403 404 if (ai[n] != 0) { 405 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 406 } else { 407 (*B)->info.fill_ratio_needed = 0.0; 408 } 409 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 410 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 411 PetscFunctionReturn(0); 412 } 413 414 /* ----------------------------------------------------------- */ 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 417 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 418 { 419 Mat C=*B; 420 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 421 IS isrow = b->row,isicol = b->icol; 422 PetscErrorCode ierr; 423 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 424 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 425 PetscInt *diag_offset = b->diag,diag,*pj; 426 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 427 PetscScalar d; 428 PetscReal rs; 429 LUShift_Ctx sctx; 430 PetscInt newshift; 431 432 PetscFunctionBegin; 433 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 434 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 435 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 436 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 437 rtmps = rtmp; ics = ic; 438 439 if (!a->diag) { 440 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 441 } 442 /* if both shift schemes are chosen by user, only use info->shiftpd */ 443 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 444 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 445 PetscInt *aai = a->i,*ddiag = a->diag; 446 sctx.shift_top = 0; 447 for (i=0; i<n; i++) { 448 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 449 d = (a->a)[ddiag[i]]; 450 rs = -PetscAbsScalar(d) - PetscRealPart(d); 451 v = a->a+aai[i]; 452 nz = aai[i+1] - aai[i]; 453 for (j=0; j<nz; j++) 454 rs += PetscAbsScalar(v[j]); 455 if (rs>sctx.shift_top) sctx.shift_top = rs; 456 } 457 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 458 sctx.shift_top *= 1.1; 459 sctx.nshift_max = 5; 460 sctx.shift_lo = 0.; 461 sctx.shift_hi = 1.; 462 } 463 464 sctx.shift_amount = 0; 465 sctx.nshift = 0; 466 do { 467 sctx.lushift = PETSC_FALSE; 468 for (i=0; i<n; i++){ 469 nz = bi[i+1] - bi[i]; 470 bjtmp = bj + bi[i]; 471 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 472 473 /* load in initial (unfactored row) */ 474 nz = a->i[r[i]+1] - a->i[r[i]]; 475 ajtmp = a->j + a->i[r[i]]; 476 v = a->a + a->i[r[i]]; 477 for (j=0; j<nz; j++) { 478 rtmp[ics[ajtmp[j]]] = v[j]; 479 } 480 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 481 482 row = *bjtmp++; 483 while (row < i) { 484 pc = rtmp + row; 485 if (*pc != 0.0) { 486 pv = b->a + diag_offset[row]; 487 pj = b->j + diag_offset[row] + 1; 488 multiplier = *pc / *pv++; 489 *pc = multiplier; 490 nz = bi[row+1] - diag_offset[row] - 1; 491 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 492 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 493 } 494 row = *bjtmp++; 495 } 496 /* finished row so stick it into b->a */ 497 pv = b->a + bi[i] ; 498 pj = b->j + bi[i] ; 499 nz = bi[i+1] - bi[i]; 500 diag = diag_offset[i] - bi[i]; 501 rs = 0.0; 502 for (j=0; j<nz; j++) { 503 pv[j] = rtmps[pj[j]]; 504 if (j != diag) rs += PetscAbsScalar(pv[j]); 505 } 506 507 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 508 sctx.rs = rs; 509 sctx.pv = pv[diag]; 510 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 511 if (newshift == 1){ 512 break; /* sctx.shift_amount is updated */ 513 } else if (newshift == -1){ 514 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 515 } 516 } 517 518 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 519 /* 520 * if no shift in this attempt & shifting & started shifting & can refine, 521 * then try lower shift 522 */ 523 sctx.shift_hi = info->shift_fraction; 524 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 525 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 526 sctx.lushift = PETSC_TRUE; 527 sctx.nshift++; 528 } 529 } while (sctx.lushift); 530 531 /* invert diagonal entries for simplier triangular solves */ 532 for (i=0; i<n; i++) { 533 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 534 } 535 536 ierr = PetscFree(rtmp);CHKERRQ(ierr); 537 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 538 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 539 C->factor = FACTOR_LU; 540 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 541 C->assembled = PETSC_TRUE; 542 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 543 if (sctx.nshift){ 544 if (info->shiftnz) { 545 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 546 } else if (info->shiftpd) { 547 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 548 } 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 555 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 556 { 557 PetscFunctionBegin; 558 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 559 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 560 PetscFunctionReturn(0); 561 } 562 563 564 /* ----------------------------------------------------------- */ 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatLUFactor_SeqAIJ" 567 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 568 { 569 PetscErrorCode ierr; 570 Mat C; 571 572 PetscFunctionBegin; 573 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 574 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 575 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 576 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSolve_SeqAIJ" 582 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 583 { 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 585 IS iscol = a->col,isrow = a->row; 586 PetscErrorCode ierr; 587 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 588 PetscInt nz,*rout,*cout; 589 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 590 591 PetscFunctionBegin; 592 if (!n) PetscFunctionReturn(0); 593 594 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 596 tmp = a->solve_work; 597 598 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 599 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 600 601 /* forward solve the lower triangular */ 602 tmp[0] = b[*r++]; 603 tmps = tmp; 604 for (i=1; i<n; i++) { 605 v = aa + ai[i] ; 606 vi = aj + ai[i] ; 607 nz = a->diag[i] - ai[i]; 608 sum = b[*r++]; 609 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 610 tmp[i] = sum; 611 } 612 613 /* backward solve the upper triangular */ 614 for (i=n-1; i>=0; i--){ 615 v = aa + a->diag[i] + 1; 616 vi = aj + a->diag[i] + 1; 617 nz = ai[i+1] - a->diag[i] - 1; 618 sum = tmp[i]; 619 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 620 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 621 } 622 623 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 624 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 625 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 /* ----------------------------------------------------------- */ 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 634 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 635 { 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 637 PetscErrorCode ierr; 638 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 639 PetscScalar *x,*b,*aa = a->a; 640 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 641 PetscInt adiag_i,i,*vi,nz,ai_i; 642 PetscScalar *v,sum; 643 #endif 644 645 PetscFunctionBegin; 646 if (!n) PetscFunctionReturn(0); 647 648 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 649 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 650 651 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 652 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 653 #else 654 /* forward solve the lower triangular */ 655 x[0] = b[0]; 656 for (i=1; i<n; i++) { 657 ai_i = ai[i]; 658 v = aa + ai_i; 659 vi = aj + ai_i; 660 nz = adiag[i] - ai_i; 661 sum = b[i]; 662 while (nz--) sum -= *v++ * x[*vi++]; 663 x[i] = sum; 664 } 665 666 /* backward solve the upper triangular */ 667 for (i=n-1; i>=0; i--){ 668 adiag_i = adiag[i]; 669 v = aa + adiag_i + 1; 670 vi = aj + adiag_i + 1; 671 nz = ai[i+1] - adiag_i - 1; 672 sum = x[i]; 673 while (nz--) sum -= *v++ * x[*vi++]; 674 x[i] = sum*aa[adiag_i]; 675 } 676 #endif 677 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 678 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 679 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 685 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 686 { 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 688 IS iscol = a->col,isrow = a->row; 689 PetscErrorCode ierr; 690 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 691 PetscInt nz,*rout,*cout; 692 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 693 694 PetscFunctionBegin; 695 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 696 697 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 699 tmp = a->solve_work; 700 701 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 702 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 703 704 /* forward solve the lower triangular */ 705 tmp[0] = b[*r++]; 706 for (i=1; i<n; i++) { 707 v = aa + ai[i] ; 708 vi = aj + ai[i] ; 709 nz = a->diag[i] - ai[i]; 710 sum = b[*r++]; 711 while (nz--) sum -= *v++ * tmp[*vi++ ]; 712 tmp[i] = sum; 713 } 714 715 /* backward solve the upper triangular */ 716 for (i=n-1; i>=0; i--){ 717 v = aa + a->diag[i] + 1; 718 vi = aj + a->diag[i] + 1; 719 nz = ai[i+1] - a->diag[i] - 1; 720 sum = tmp[i]; 721 while (nz--) sum -= *v++ * tmp[*vi++ ]; 722 tmp[i] = sum*aa[a->diag[i]]; 723 x[*c--] += tmp[i]; 724 } 725 726 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 728 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 729 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 730 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 731 732 PetscFunctionReturn(0); 733 } 734 /* -------------------------------------------------------------------*/ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 737 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 738 { 739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 740 IS iscol = a->col,isrow = a->row; 741 PetscErrorCode ierr; 742 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 743 PetscInt nz,*rout,*cout,*diag = a->diag; 744 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 745 746 PetscFunctionBegin; 747 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 749 tmp = a->solve_work; 750 751 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 752 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 753 754 /* copy the b into temp work space according to permutation */ 755 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 756 757 /* forward solve the U^T */ 758 for (i=0; i<n; i++) { 759 v = aa + diag[i] ; 760 vi = aj + diag[i] + 1; 761 nz = ai[i+1] - diag[i] - 1; 762 s1 = tmp[i]; 763 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 764 while (nz--) { 765 tmp[*vi++ ] -= (*v++)*s1; 766 } 767 tmp[i] = s1; 768 } 769 770 /* backward solve the L^T */ 771 for (i=n-1; i>=0; i--){ 772 v = aa + diag[i] - 1 ; 773 vi = aj + diag[i] - 1 ; 774 nz = diag[i] - ai[i]; 775 s1 = tmp[i]; 776 while (nz--) { 777 tmp[*vi-- ] -= (*v--)*s1; 778 } 779 } 780 781 /* copy tmp into x according to permutation */ 782 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 783 784 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 785 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 786 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 789 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 795 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,*diag = a->diag; 802 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 803 804 PetscFunctionBegin; 805 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 806 807 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 808 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 809 tmp = a->solve_work; 810 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 /* copy the b into temp work space according to permutation */ 815 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 816 817 /* forward solve the U^T */ 818 for (i=0; i<n; i++) { 819 v = aa + diag[i] ; 820 vi = aj + diag[i] + 1; 821 nz = ai[i+1] - diag[i] - 1; 822 tmp[i] *= *v++; 823 while (nz--) { 824 tmp[*vi++ ] -= (*v++)*tmp[i]; 825 } 826 } 827 828 /* backward solve the L^T */ 829 for (i=n-1; i>=0; i--){ 830 v = aa + diag[i] - 1 ; 831 vi = aj + diag[i] - 1 ; 832 nz = diag[i] - ai[i]; 833 while (nz--) { 834 tmp[*vi-- ] -= (*v--)*tmp[i]; 835 } 836 } 837 838 /* copy tmp into x according to permutation */ 839 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 840 841 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 842 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 843 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 844 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 845 846 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 /* ----------------------------------------------------------------*/ 850 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 854 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 855 { 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 857 IS isicol; 858 PetscErrorCode ierr; 859 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 860 PetscInt *bi,*cols,nnz,*cols_lvl; 861 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 862 PetscInt i,levels,diagonal_fill; 863 PetscTruth col_identity,row_identity; 864 PetscReal f; 865 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 866 PetscBT lnkbt; 867 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 868 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 869 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 870 871 PetscFunctionBegin; 872 f = info->fill; 873 levels = (PetscInt)info->levels; 874 diagonal_fill = (PetscInt)info->diagonal_fill; 875 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 876 877 /* special case that simply copies fill pattern */ 878 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 879 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 880 if (!levels && row_identity && col_identity) { 881 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 882 (*fact)->factor = FACTOR_LU; 883 b = (Mat_SeqAIJ*)(*fact)->data; 884 if (!b->diag) { 885 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 886 } 887 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 888 b->row = isrow; 889 b->col = iscol; 890 b->icol = isicol; 891 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 892 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 893 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 899 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 900 901 /* get new row pointers */ 902 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 903 bi[0] = 0; 904 /* bdiag is location of diagonal in factor */ 905 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 906 bdiag[0] = 0; 907 908 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 909 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 910 911 /* create a linked list for storing column indices of the active row */ 912 nlnk = n + 1; 913 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 914 915 /* initial FreeSpace size is f*(ai[n]+1) */ 916 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 917 current_space = free_space; 918 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 919 current_space_lvl = free_space_lvl; 920 921 for (i=0; i<n; i++) { 922 nzi = 0; 923 /* copy current row into linked list */ 924 nnz = ai[r[i]+1] - ai[r[i]]; 925 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 926 cols = aj + ai[r[i]]; 927 lnk[i] = -1; /* marker to indicate if diagonal exists */ 928 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 929 nzi += nlnk; 930 931 /* make sure diagonal entry is included */ 932 if (diagonal_fill && lnk[i] == -1) { 933 fm = n; 934 while (lnk[fm] < i) fm = lnk[fm]; 935 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 936 lnk[fm] = i; 937 lnk_lvl[i] = 0; 938 nzi++; dcount++; 939 } 940 941 /* add pivot rows into the active row */ 942 nzbd = 0; 943 prow = lnk[n]; 944 while (prow < i) { 945 nnz = bdiag[prow]; 946 cols = bj_ptr[prow] + nnz + 1; 947 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 948 nnz = bi[prow+1] - bi[prow] - nnz - 1; 949 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 950 nzi += nlnk; 951 prow = lnk[prow]; 952 nzbd++; 953 } 954 bdiag[i] = nzbd; 955 bi[i+1] = bi[i] + nzi; 956 957 /* if free space is not available, make more free space */ 958 if (current_space->local_remaining<nzi) { 959 nnz = nzi*(n - i); /* estimated and max additional space needed */ 960 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 961 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 962 reallocs++; 963 } 964 965 /* copy data into free_space and free_space_lvl, then initialize lnk */ 966 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 967 bj_ptr[i] = current_space->array; 968 bjlvl_ptr[i] = current_space_lvl->array; 969 970 /* make sure the active row i has diagonal entry */ 971 if (*(bj_ptr[i]+bdiag[i]) != i) { 972 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 973 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 974 } 975 976 current_space->array += nzi; 977 current_space->local_used += nzi; 978 current_space->local_remaining -= nzi; 979 current_space_lvl->array += nzi; 980 current_space_lvl->local_used += nzi; 981 current_space_lvl->local_remaining -= nzi; 982 } 983 984 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 985 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 986 987 /* destroy list of free space and other temporary arrays */ 988 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 989 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 990 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 991 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 992 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 993 994 #if defined(PETSC_USE_DEBUG) 995 { 996 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 997 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 998 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 999 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1000 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1001 if (diagonal_fill) { 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1003 } 1004 } 1005 #endif 1006 1007 /* put together the new matrix */ 1008 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1009 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1010 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1011 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1012 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1013 b = (Mat_SeqAIJ*)(*fact)->data; 1014 b->freedata = PETSC_TRUE; 1015 b->singlemalloc = PETSC_FALSE; 1016 len = (bi[n] )*sizeof(PetscScalar); 1017 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1018 b->j = bj; 1019 b->i = bi; 1020 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1021 b->diag = bdiag; 1022 b->ilen = 0; 1023 b->imax = 0; 1024 b->row = isrow; 1025 b->col = iscol; 1026 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1027 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1028 b->icol = isicol; 1029 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1030 /* In b structure: Free imax, ilen, old a, old j. 1031 Allocate bdiag, solve_work, new a, new j */ 1032 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1033 b->maxnz = b->nz = bi[n] ; 1034 (*fact)->factor = FACTOR_LU; 1035 (*fact)->info.factor_mallocs = reallocs; 1036 (*fact)->info.fill_ratio_given = f; 1037 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1038 1039 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1040 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1041 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #include "src/mat/impls/sbaij/seq/sbaij.h" 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1048 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1049 { 1050 Mat C = *B; 1051 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1052 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1053 IS ip=b->row; 1054 PetscErrorCode ierr; 1055 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1056 PetscInt *ai=a->i,*aj=a->j; 1057 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1058 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1059 PetscReal zeropivot,rs,shiftnz; 1060 PetscTruth shiftpd; 1061 ChShift_Ctx sctx; 1062 PetscInt newshift; 1063 1064 PetscFunctionBegin; 1065 shiftnz = info->shiftnz; 1066 shiftpd = info->shiftpd; 1067 zeropivot = info->zeropivot; 1068 1069 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1070 1071 /* initialization */ 1072 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1073 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1074 jl = il + mbs; 1075 rtmp = (MatScalar*)(jl + mbs); 1076 1077 sctx.shift_amount = 0; 1078 sctx.nshift = 0; 1079 do { 1080 sctx.chshift = PETSC_FALSE; 1081 for (i=0; i<mbs; i++) { 1082 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1083 } 1084 1085 for (k = 0; k<mbs; k++){ 1086 bval = ba + bi[k]; 1087 /* initialize k-th row by the perm[k]-th row of A */ 1088 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1089 for (j = jmin; j < jmax; j++){ 1090 col = rip[aj[j]]; 1091 if (col >= k){ /* only take upper triangular entry */ 1092 rtmp[col] = aa[j]; 1093 *bval++ = 0.0; /* for in-place factorization */ 1094 } 1095 } 1096 /* shift the diagonal of the matrix */ 1097 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1098 1099 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1100 dk = rtmp[k]; 1101 i = jl[k]; /* first row to be added to k_th row */ 1102 1103 while (i < k){ 1104 nexti = jl[i]; /* next row to be added to k_th row */ 1105 1106 /* compute multiplier, update diag(k) and U(i,k) */ 1107 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1108 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1109 dk += uikdi*ba[ili]; 1110 ba[ili] = uikdi; /* -U(i,k) */ 1111 1112 /* add multiple of row i to k-th row */ 1113 jmin = ili + 1; jmax = bi[i+1]; 1114 if (jmin < jmax){ 1115 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1116 /* update il and jl for row i */ 1117 il[i] = jmin; 1118 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1119 } 1120 i = nexti; 1121 } 1122 1123 /* shift the diagonals when zero pivot is detected */ 1124 /* compute rs=sum of abs(off-diagonal) */ 1125 rs = 0.0; 1126 jmin = bi[k]+1; 1127 nz = bi[k+1] - jmin; 1128 if (nz){ 1129 bcol = bj + jmin; 1130 while (nz--){ 1131 rs += PetscAbsScalar(rtmp[*bcol]); 1132 bcol++; 1133 } 1134 } 1135 1136 sctx.rs = rs; 1137 sctx.pv = dk; 1138 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1139 if (newshift == 1){ 1140 break; /* sctx.shift_amount is updated */ 1141 } else if (newshift == -1){ 1142 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1143 } 1144 1145 /* copy data into U(k,:) */ 1146 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1147 jmin = bi[k]+1; jmax = bi[k+1]; 1148 if (jmin < jmax) { 1149 for (j=jmin; j<jmax; j++){ 1150 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1151 } 1152 /* add the k-th row into il and jl */ 1153 il[k] = jmin; 1154 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1155 } 1156 } 1157 } while (sctx.chshift); 1158 ierr = PetscFree(il);CHKERRQ(ierr); 1159 1160 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1161 C->factor = FACTOR_CHOLESKY; 1162 C->assembled = PETSC_TRUE; 1163 C->preallocated = PETSC_TRUE; 1164 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1165 if (sctx.nshift){ 1166 if (shiftnz) { 1167 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1168 } else if (shiftpd) { 1169 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1170 } 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1177 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1178 { 1179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1180 Mat_SeqSBAIJ *b; 1181 Mat B; 1182 PetscErrorCode ierr; 1183 PetscTruth perm_identity; 1184 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1185 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1186 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1187 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1188 PetscReal fill=info->fill,levels=info->levels; 1189 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1190 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1191 PetscBT lnkbt; 1192 1193 PetscFunctionBegin; 1194 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1195 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1196 1197 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1198 ui[0] = 0; 1199 1200 /* special case that simply copies fill pattern */ 1201 if (!levels && perm_identity) { 1202 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1203 for (i=0; i<am; i++) { 1204 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1205 } 1206 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1207 cols = uj; 1208 for (i=0; i<am; i++) { 1209 aj = a->j + a->diag[i]; 1210 ncols = ui[i+1] - ui[i]; 1211 for (j=0; j<ncols; j++) *cols++ = *aj++; 1212 } 1213 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1214 /* initialization */ 1215 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1216 1217 /* jl: linked list for storing indices of the pivot rows 1218 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1219 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1220 il = jl + am; 1221 uj_ptr = (PetscInt**)(il + am); 1222 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1223 for (i=0; i<am; i++){ 1224 jl[i] = am; il[i] = 0; 1225 } 1226 1227 /* create and initialize a linked list for storing column indices of the active row k */ 1228 nlnk = am + 1; 1229 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1230 1231 /* initial FreeSpace size is fill*(ai[am]+1) */ 1232 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1233 current_space = free_space; 1234 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1235 current_space_lvl = free_space_lvl; 1236 1237 for (k=0; k<am; k++){ /* for each active row k */ 1238 /* initialize lnk by the column indices of row rip[k] of A */ 1239 nzk = 0; 1240 ncols = ai[rip[k]+1] - ai[rip[k]]; 1241 ncols_upper = 0; 1242 for (j=0; j<ncols; j++){ 1243 i = *(aj + ai[rip[k]] + j); 1244 if (rip[i] >= k){ /* only take upper triangular entry */ 1245 ajtmp[ncols_upper] = i; 1246 ncols_upper++; 1247 } 1248 } 1249 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1250 nzk += nlnk; 1251 1252 /* update lnk by computing fill-in for each pivot row to be merged in */ 1253 prow = jl[k]; /* 1st pivot row */ 1254 1255 while (prow < k){ 1256 nextprow = jl[prow]; 1257 1258 /* merge prow into k-th row */ 1259 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1260 jmax = ui[prow+1]; 1261 ncols = jmax-jmin; 1262 i = jmin - ui[prow]; 1263 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1264 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1265 j = *(uj - 1); 1266 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1267 nzk += nlnk; 1268 1269 /* update il and jl for prow */ 1270 if (jmin < jmax){ 1271 il[prow] = jmin; 1272 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1273 } 1274 prow = nextprow; 1275 } 1276 1277 /* if free space is not available, make more free space */ 1278 if (current_space->local_remaining<nzk) { 1279 i = am - k + 1; /* num of unfactored rows */ 1280 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1281 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1282 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1283 reallocs++; 1284 } 1285 1286 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1287 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1288 1289 /* add the k-th row into il and jl */ 1290 if (nzk > 1){ 1291 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1292 jl[k] = jl[i]; jl[i] = k; 1293 il[k] = ui[k] + 1; 1294 } 1295 uj_ptr[k] = current_space->array; 1296 uj_lvl_ptr[k] = current_space_lvl->array; 1297 1298 current_space->array += nzk; 1299 current_space->local_used += nzk; 1300 current_space->local_remaining -= nzk; 1301 1302 current_space_lvl->array += nzk; 1303 current_space_lvl->local_used += nzk; 1304 current_space_lvl->local_remaining -= nzk; 1305 1306 ui[k+1] = ui[k] + nzk; 1307 } 1308 1309 #if defined(PETSC_USE_DEBUG) 1310 if (ai[am] != 0) { 1311 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1312 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1313 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1314 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1315 } else { 1316 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1317 } 1318 #endif 1319 1320 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1321 ierr = PetscFree(jl);CHKERRQ(ierr); 1322 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1323 1324 /* destroy list of free space and other temporary array(s) */ 1325 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1326 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1327 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1328 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1329 1330 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1331 1332 /* put together the new matrix in MATSEQSBAIJ format */ 1333 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1334 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1335 B = *fact; 1336 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1337 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1338 1339 b = (Mat_SeqSBAIJ*)B->data; 1340 b->singlemalloc = PETSC_FALSE; 1341 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1342 b->j = uj; 1343 b->i = ui; 1344 b->diag = 0; 1345 b->ilen = 0; 1346 b->imax = 0; 1347 b->row = perm; 1348 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1349 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1350 b->icol = perm; 1351 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1353 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1354 b->maxnz = b->nz = ui[am]; 1355 1356 B->factor = FACTOR_CHOLESKY; 1357 B->info.factor_mallocs = reallocs; 1358 B->info.fill_ratio_given = fill; 1359 if (ai[am] != 0) { 1360 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1361 } else { 1362 B->info.fill_ratio_needed = 0.0; 1363 } 1364 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1365 if (perm_identity){ 1366 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1367 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1374 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1375 { 1376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1377 Mat_SeqSBAIJ *b; 1378 Mat B; 1379 PetscErrorCode ierr; 1380 PetscTruth perm_identity; 1381 PetscReal fill = info->fill; 1382 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1383 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1384 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1385 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1386 PetscBT lnkbt; 1387 IS iperm; 1388 1389 PetscFunctionBegin; 1390 /* check whether perm is the identity mapping */ 1391 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1392 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1393 1394 if (!perm_identity){ 1395 /* check if perm is symmetric! */ 1396 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1397 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1398 for (i=0; i<am; i++) { 1399 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1400 } 1401 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1402 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1403 } 1404 1405 /* initialization */ 1406 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1407 ui[0] = 0; 1408 1409 /* jl: linked list for storing indices of the pivot rows 1410 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1411 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1412 il = jl + am; 1413 cols = il + am; 1414 ui_ptr = (PetscInt**)(cols + am); 1415 for (i=0; i<am; i++){ 1416 jl[i] = am; il[i] = 0; 1417 } 1418 1419 /* create and initialize a linked list for storing column indices of the active row k */ 1420 nlnk = am + 1; 1421 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1422 1423 /* initial FreeSpace size is fill*(ai[am]+1) */ 1424 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1425 current_space = free_space; 1426 1427 for (k=0; k<am; k++){ /* for each active row k */ 1428 /* initialize lnk by the column indices of row rip[k] of A */ 1429 nzk = 0; 1430 ncols = ai[rip[k]+1] - ai[rip[k]]; 1431 ncols_upper = 0; 1432 for (j=0; j<ncols; j++){ 1433 i = rip[*(aj + ai[rip[k]] + j)]; 1434 if (i >= k){ /* only take upper triangular entry */ 1435 cols[ncols_upper] = i; 1436 ncols_upper++; 1437 } 1438 } 1439 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1440 nzk += nlnk; 1441 1442 /* update lnk by computing fill-in for each pivot row to be merged in */ 1443 prow = jl[k]; /* 1st pivot row */ 1444 1445 while (prow < k){ 1446 nextprow = jl[prow]; 1447 /* merge prow into k-th row */ 1448 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1449 jmax = ui[prow+1]; 1450 ncols = jmax-jmin; 1451 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1452 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1453 nzk += nlnk; 1454 1455 /* update il and jl for prow */ 1456 if (jmin < jmax){ 1457 il[prow] = jmin; 1458 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1459 } 1460 prow = nextprow; 1461 } 1462 1463 /* if free space is not available, make more free space */ 1464 if (current_space->local_remaining<nzk) { 1465 i = am - k + 1; /* num of unfactored rows */ 1466 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1467 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1468 reallocs++; 1469 } 1470 1471 /* copy data into free space, then initialize lnk */ 1472 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1473 1474 /* add the k-th row into il and jl */ 1475 if (nzk-1 > 0){ 1476 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1477 jl[k] = jl[i]; jl[i] = k; 1478 il[k] = ui[k] + 1; 1479 } 1480 ui_ptr[k] = current_space->array; 1481 current_space->array += nzk; 1482 current_space->local_used += nzk; 1483 current_space->local_remaining -= nzk; 1484 1485 ui[k+1] = ui[k] + nzk; 1486 } 1487 1488 #if defined(PETSC_USE_DEBUG) 1489 if (ai[am] != 0) { 1490 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1491 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1492 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1493 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1494 } else { 1495 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1496 } 1497 #endif 1498 1499 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1500 ierr = PetscFree(jl);CHKERRQ(ierr); 1501 1502 /* destroy list of free space and other temporary array(s) */ 1503 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1504 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1505 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1506 1507 /* put together the new matrix in MATSEQSBAIJ format */ 1508 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1509 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1510 B = *fact; 1511 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1512 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1513 1514 b = (Mat_SeqSBAIJ*)B->data; 1515 b->singlemalloc = PETSC_FALSE; 1516 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1517 b->j = uj; 1518 b->i = ui; 1519 b->diag = 0; 1520 b->ilen = 0; 1521 b->imax = 0; 1522 b->row = perm; 1523 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1524 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1525 b->icol = perm; 1526 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1527 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1528 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1529 b->maxnz = b->nz = ui[am]; 1530 1531 B->factor = FACTOR_CHOLESKY; 1532 B->info.factor_mallocs = reallocs; 1533 B->info.fill_ratio_given = fill; 1534 if (ai[am] != 0) { 1535 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1536 } else { 1537 B->info.fill_ratio_needed = 0.0; 1538 } 1539 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1540 if (perm_identity){ 1541 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1542 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546