1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 25 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 26 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->m; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 ierr = PetscFree(b->imax);CHKERRQ(ierr); 230 b->sorted = PETSC_FALSE; 231 b->singlemalloc = PETSC_FALSE; 232 /* the next line frees the default space generated by the MatCreate() */ 233 ierr = PetscFree(b->a);CHKERRQ(ierr); 234 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));CHKERRQ(ierr); 252 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 253 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 254 ierr = PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,*cols,**bi_ptr; 274 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);CHKERRQ(ierr); 296 im = cols + n; 297 bi_ptr = (PetscInt**)(im + n); 298 299 /* initial FreeSpace size is f*(ai[n]+1) */ 300 f = info->fill; 301 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */ 311 ierr = PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 nzi += nlnk; 313 314 /* add pivot rows into linked list */ 315 row = lnk[n]; 316 while (row < i) { 317 nzbd = bdiag[row] - bi[row] + 1; 318 ajtmp = bi_ptr[row] + nzbd; 319 nnz = im[row] - nzbd; /* num of columns with row<indices<=i */ 320 im[row] = nzbd; 321 ierr = PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);CHKERRQ(ierr); 322 nzi += nlnk; 323 im[row] += nzbd; /* update im[row]: num of cols with index<=i */ 324 325 row = lnk[row]; 326 } 327 328 bi[i+1] = bi[i] + nzi; 329 im[i] = nzi; 330 331 /* mark bdiag */ 332 nzbd = 0; 333 nnz = nzi; 334 k = lnk[n]; 335 while (nnz-- && k < i){ 336 nzbd++; 337 k = lnk[k]; 338 } 339 bdiag[i] = bi[i] + nzbd; 340 341 /* if free space is not available, make more free space */ 342 if (current_space->local_remaining<nzi) { 343 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 344 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 345 reallocs++; 346 } 347 348 /* copy data into free space, then initialize lnk */ 349 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 350 bi_ptr[i] = current_space->array; 351 current_space->array += nzi; 352 current_space->local_used += nzi; 353 current_space->local_remaining -= nzi; 354 } 355 #if defined(PETSC_USE_DEBUG) 356 if (ai[n] != 0) { 357 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 358 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 359 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af));CHKERRQ(ierr); 360 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));CHKERRQ(ierr); 361 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 362 } else { 363 ierr = PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));CHKERRQ(ierr); 364 } 365 #endif 366 367 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 368 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 369 370 /* destroy list of free space and other temporary array(s) */ 371 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 372 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374 ierr = PetscFree(cols);CHKERRQ(ierr); 375 376 /* put together the new matrix */ 377 ierr = MatCreate(A->comm,n,n,n,n,B);CHKERRQ(ierr); 378 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 379 ierr = MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);CHKERRQ(ierr); 380 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 381 b = (Mat_SeqAIJ*)(*B)->data; 382 ierr = PetscFree(b->imax);CHKERRQ(ierr); 383 b->singlemalloc = PETSC_FALSE; 384 /* the next line frees the default space generated by the Create() */ 385 ierr = PetscFree(b->a);CHKERRQ(ierr); 386 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 387 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 388 b->j = bj; 389 b->i = bi; 390 b->diag = bdiag; 391 b->ilen = 0; 392 b->imax = 0; 393 b->row = isrow; 394 b->col = iscol; 395 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 396 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 397 b->icol = isicol; 398 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 399 400 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 401 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 402 b->maxnz = b->nz = bi[n] ; 403 404 (*B)->factor = FACTOR_LU; 405 (*B)->info.factor_mallocs = reallocs; 406 (*B)->info.fill_ratio_given = f; 407 408 if (ai[n] != 0) { 409 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 410 } else { 411 (*B)->info.fill_ratio_needed = 0.0; 412 } 413 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 414 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 415 PetscFunctionReturn(0); 416 } 417 418 /* ----------------------------------------------------------- */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 421 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 422 { 423 Mat C=*B; 424 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 425 IS isrow = b->row,isicol = b->icol; 426 PetscErrorCode ierr; 427 PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j; 428 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 429 PetscInt *diag_offset = b->diag,diag,*pj; 430 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 431 PetscScalar d; 432 PetscReal rs; 433 LUShift_Ctx sctx; 434 PetscInt newshift; 435 436 PetscFunctionBegin; 437 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 438 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 439 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 440 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 441 rtmps = rtmp; ics = ic; 442 443 if (!a->diag) { 444 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 445 } 446 /* if both shift schemes are chosen by user, only use info->shiftpd */ 447 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 448 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 449 PetscInt *aai = a->i,*ddiag = a->diag; 450 sctx.shift_top = 0; 451 for (i=0; i<n; i++) { 452 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 453 d = (a->a)[ddiag[i]]; 454 rs = -PetscAbsScalar(d) - PetscRealPart(d); 455 v = a->a+aai[i]; 456 nz = aai[i+1] - aai[i]; 457 for (j=0; j<nz; j++) 458 rs += PetscAbsScalar(v[j]); 459 if (rs>sctx.shift_top) sctx.shift_top = rs; 460 } 461 if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12; 462 sctx.shift_top *= 1.1; 463 sctx.nshift_max = 5; 464 sctx.shift_lo = 0.; 465 sctx.shift_hi = 1.; 466 } 467 468 sctx.shift_amount = 0; 469 sctx.nshift = 0; 470 do { 471 sctx.lushift = PETSC_FALSE; 472 for (i=0; i<n; i++){ 473 nz = bi[i+1] - bi[i]; 474 bjtmp = bj + bi[i]; 475 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 476 477 /* load in initial (unfactored row) */ 478 nz = a->i[r[i]+1] - a->i[r[i]]; 479 ajtmp = a->j + a->i[r[i]]; 480 v = a->a + a->i[r[i]]; 481 for (j=0; j<nz; j++) { 482 rtmp[ics[ajtmp[j]]] = v[j]; 483 } 484 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 485 486 row = *bjtmp++; 487 while (row < i) { 488 pc = rtmp + row; 489 if (*pc != 0.0) { 490 pv = b->a + diag_offset[row]; 491 pj = b->j + diag_offset[row] + 1; 492 multiplier = *pc / *pv++; 493 *pc = multiplier; 494 nz = bi[row+1] - diag_offset[row] - 1; 495 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 496 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 497 } 498 row = *bjtmp++; 499 } 500 /* finished row so stick it into b->a */ 501 pv = b->a + bi[i] ; 502 pj = b->j + bi[i] ; 503 nz = bi[i+1] - bi[i]; 504 diag = diag_offset[i] - bi[i]; 505 rs = 0.0; 506 for (j=0; j<nz; j++) { 507 pv[j] = rtmps[pj[j]]; 508 if (j != diag) rs += PetscAbsScalar(pv[j]); 509 } 510 511 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 512 sctx.rs = rs; 513 sctx.pv = pv[diag]; 514 ierr = MatLUCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 515 if (newshift == 1){ 516 break; /* sctx.shift_amount is updated */ 517 } else if (newshift == -1){ 518 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs); 519 } 520 } 521 522 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 523 /* 524 * if no shift in this attempt & shifting & started shifting & can refine, 525 * then try lower shift 526 */ 527 sctx.shift_hi = info->shift_fraction; 528 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 529 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 530 sctx.lushift = PETSC_TRUE; 531 sctx.nshift++; 532 } 533 } while (sctx.lushift); 534 535 /* invert diagonal entries for simplier triangular solves */ 536 for (i=0; i<n; i++) { 537 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 538 } 539 540 ierr = PetscFree(rtmp);CHKERRQ(ierr); 541 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 542 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 543 C->factor = FACTOR_LU; 544 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 545 C->assembled = PETSC_TRUE; 546 ierr = PetscLogFlops(C->n);CHKERRQ(ierr); 547 if (sctx.nshift){ 548 if (info->shiftnz) { 549 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 550 } else if (info->shiftpd) { 551 ierr = PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));CHKERRQ(ierr); 552 } 553 } 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 559 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 560 { 561 PetscFunctionBegin; 562 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 563 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 564 PetscFunctionReturn(0); 565 } 566 567 568 /* ----------------------------------------------------------- */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatLUFactor_SeqAIJ" 571 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 572 { 573 PetscErrorCode ierr; 574 Mat C; 575 576 PetscFunctionBegin; 577 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 578 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 579 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 580 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 /* ----------------------------------------------------------- */ 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatSolve_SeqAIJ" 586 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 587 { 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 589 IS iscol = a->col,isrow = a->row; 590 PetscErrorCode ierr; 591 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 592 PetscInt nz,*rout,*cout; 593 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 594 595 PetscFunctionBegin; 596 if (!n) PetscFunctionReturn(0); 597 598 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 599 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 600 tmp = a->solve_work; 601 602 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 603 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 604 605 /* forward solve the lower triangular */ 606 tmp[0] = b[*r++]; 607 tmps = tmp; 608 for (i=1; i<n; i++) { 609 v = aa + ai[i] ; 610 vi = aj + ai[i] ; 611 nz = a->diag[i] - ai[i]; 612 sum = b[*r++]; 613 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 614 tmp[i] = sum; 615 } 616 617 /* backward solve the upper triangular */ 618 for (i=n-1; i>=0; i--){ 619 v = aa + a->diag[i] + 1; 620 vi = aj + a->diag[i] + 1; 621 nz = ai[i+1] - a->diag[i] - 1; 622 sum = tmp[i]; 623 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 624 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 625 } 626 627 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 628 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 629 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 630 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 631 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 /* ----------------------------------------------------------- */ 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 638 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 639 { 640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 641 PetscErrorCode ierr; 642 PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag; 643 PetscScalar *x,*b,*aa = a->a; 644 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 645 PetscInt adiag_i,i,*vi,nz,ai_i; 646 PetscScalar *v,sum; 647 #endif 648 649 PetscFunctionBegin; 650 if (!n) PetscFunctionReturn(0); 651 652 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 653 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 654 655 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 656 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 657 #else 658 /* forward solve the lower triangular */ 659 x[0] = b[0]; 660 for (i=1; i<n; i++) { 661 ai_i = ai[i]; 662 v = aa + ai_i; 663 vi = aj + ai_i; 664 nz = adiag[i] - ai_i; 665 sum = b[i]; 666 while (nz--) sum -= *v++ * x[*vi++]; 667 x[i] = sum; 668 } 669 670 /* backward solve the upper triangular */ 671 for (i=n-1; i>=0; i--){ 672 adiag_i = adiag[i]; 673 v = aa + adiag_i + 1; 674 vi = aj + adiag_i + 1; 675 nz = ai[i+1] - adiag_i - 1; 676 sum = x[i]; 677 while (nz--) sum -= *v++ * x[*vi++]; 678 x[i] = sum*aa[adiag_i]; 679 } 680 #endif 681 ierr = PetscLogFlops(2*a->nz - A->n);CHKERRQ(ierr); 682 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 683 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 689 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 690 { 691 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 692 IS iscol = a->col,isrow = a->row; 693 PetscErrorCode ierr; 694 PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j; 695 PetscInt nz,*rout,*cout; 696 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 697 698 PetscFunctionBegin; 699 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 700 701 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 702 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 703 tmp = a->solve_work; 704 705 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 706 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 707 708 /* forward solve the lower triangular */ 709 tmp[0] = b[*r++]; 710 for (i=1; i<n; i++) { 711 v = aa + ai[i] ; 712 vi = aj + ai[i] ; 713 nz = a->diag[i] - ai[i]; 714 sum = b[*r++]; 715 while (nz--) sum -= *v++ * tmp[*vi++ ]; 716 tmp[i] = sum; 717 } 718 719 /* backward solve the upper triangular */ 720 for (i=n-1; i>=0; i--){ 721 v = aa + a->diag[i] + 1; 722 vi = aj + a->diag[i] + 1; 723 nz = ai[i+1] - a->diag[i] - 1; 724 sum = tmp[i]; 725 while (nz--) sum -= *v++ * tmp[*vi++ ]; 726 tmp[i] = sum*aa[a->diag[i]]; 727 x[*c--] += tmp[i]; 728 } 729 730 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 731 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 732 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 733 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 734 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 735 736 PetscFunctionReturn(0); 737 } 738 /* -------------------------------------------------------------------*/ 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 741 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 742 { 743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 744 IS iscol = a->col,isrow = a->row; 745 PetscErrorCode ierr; 746 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 747 PetscInt nz,*rout,*cout,*diag = a->diag; 748 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 749 750 PetscFunctionBegin; 751 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 752 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 753 tmp = a->solve_work; 754 755 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 756 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 757 758 /* copy the b into temp work space according to permutation */ 759 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 760 761 /* forward solve the U^T */ 762 for (i=0; i<n; i++) { 763 v = aa + diag[i] ; 764 vi = aj + diag[i] + 1; 765 nz = ai[i+1] - diag[i] - 1; 766 s1 = tmp[i]; 767 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 768 while (nz--) { 769 tmp[*vi++ ] -= (*v++)*s1; 770 } 771 tmp[i] = s1; 772 } 773 774 /* backward solve the L^T */ 775 for (i=n-1; i>=0; i--){ 776 v = aa + diag[i] - 1 ; 777 vi = aj + diag[i] - 1 ; 778 nz = diag[i] - ai[i]; 779 s1 = tmp[i]; 780 while (nz--) { 781 tmp[*vi-- ] -= (*v--)*s1; 782 } 783 } 784 785 /* copy tmp into x according to permutation */ 786 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 787 788 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 789 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 790 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 793 ierr = PetscLogFlops(2*a->nz-A->n);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 799 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 800 { 801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 802 IS iscol = a->col,isrow = a->row; 803 PetscErrorCode ierr; 804 PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j; 805 PetscInt nz,*rout,*cout,*diag = a->diag; 806 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 807 808 PetscFunctionBegin; 809 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 810 811 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 812 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 813 tmp = a->solve_work; 814 815 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 816 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 817 818 /* copy the b into temp work space according to permutation */ 819 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 820 821 /* forward solve the U^T */ 822 for (i=0; i<n; i++) { 823 v = aa + diag[i] ; 824 vi = aj + diag[i] + 1; 825 nz = ai[i+1] - diag[i] - 1; 826 tmp[i] *= *v++; 827 while (nz--) { 828 tmp[*vi++ ] -= (*v++)*tmp[i]; 829 } 830 } 831 832 /* backward solve the L^T */ 833 for (i=n-1; i>=0; i--){ 834 v = aa + diag[i] - 1 ; 835 vi = aj + diag[i] - 1 ; 836 nz = diag[i] - ai[i]; 837 while (nz--) { 838 tmp[*vi-- ] -= (*v--)*tmp[i]; 839 } 840 } 841 842 /* copy tmp into x according to permutation */ 843 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 844 845 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 846 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 847 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 848 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 849 850 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 /* ----------------------------------------------------------------*/ 854 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 855 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 858 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 859 { 860 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 861 IS isicol; 862 PetscErrorCode ierr; 863 PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j; 864 PetscInt *bi,*cols,nnz,*cols_lvl; 865 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 866 PetscInt i,levels,diagonal_fill; 867 PetscTruth col_identity,row_identity; 868 PetscReal f; 869 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 870 PetscBT lnkbt; 871 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 872 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 873 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 874 875 PetscFunctionBegin; 876 f = info->fill; 877 levels = (PetscInt)info->levels; 878 diagonal_fill = (PetscInt)info->diagonal_fill; 879 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 880 881 /* special case that simply copies fill pattern */ 882 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 883 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 884 if (!levels && row_identity && col_identity) { 885 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 886 (*fact)->factor = FACTOR_LU; 887 b = (Mat_SeqAIJ*)(*fact)->data; 888 if (!b->diag) { 889 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 890 } 891 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 892 b->row = isrow; 893 b->col = iscol; 894 b->icol = isicol; 895 ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 896 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 897 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 898 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 903 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 904 905 /* get new row pointers */ 906 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 907 bi[0] = 0; 908 /* bdiag is location of diagonal in factor */ 909 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 910 bdiag[0] = 0; 911 912 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 913 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 914 915 /* create a linked list for storing column indices of the active row */ 916 nlnk = n + 1; 917 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 918 919 /* initial FreeSpace size is f*(ai[n]+1) */ 920 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 921 current_space = free_space; 922 ierr = GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 923 current_space_lvl = free_space_lvl; 924 925 for (i=0; i<n; i++) { 926 nzi = 0; 927 /* copy current row into linked list */ 928 nnz = ai[r[i]+1] - ai[r[i]]; 929 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 930 cols = aj + ai[r[i]]; 931 lnk[i] = -1; /* marker to indicate if diagonal exists */ 932 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 933 nzi += nlnk; 934 935 /* make sure diagonal entry is included */ 936 if (diagonal_fill && lnk[i] == -1) { 937 fm = n; 938 while (lnk[fm] < i) fm = lnk[fm]; 939 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 940 lnk[fm] = i; 941 lnk_lvl[i] = 0; 942 nzi++; dcount++; 943 } 944 945 /* add pivot rows into the active row */ 946 nzbd = 0; 947 prow = lnk[n]; 948 while (prow < i) { 949 nnz = bdiag[prow]; 950 cols = bj_ptr[prow] + nnz + 1; 951 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 952 nnz = bi[prow+1] - bi[prow] - nnz - 1; 953 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 954 nzi += nlnk; 955 prow = lnk[prow]; 956 nzbd++; 957 } 958 bdiag[i] = nzbd; 959 bi[i+1] = bi[i] + nzi; 960 961 /* if free space is not available, make more free space */ 962 if (current_space->local_remaining<nzi) { 963 nnz = nzi*(n - i); /* estimated and max additional space needed */ 964 ierr = GetMoreSpace(nnz,¤t_space);CHKERRQ(ierr); 965 ierr = GetMoreSpace(nnz,¤t_space_lvl);CHKERRQ(ierr); 966 reallocs++; 967 } 968 969 /* copy data into free_space and free_space_lvl, then initialize lnk */ 970 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 971 bj_ptr[i] = current_space->array; 972 bjlvl_ptr[i] = current_space_lvl->array; 973 974 /* make sure the active row i has diagonal entry */ 975 if (*(bj_ptr[i]+bdiag[i]) != i) { 976 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 977 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i); 978 } 979 980 current_space->array += nzi; 981 current_space->local_used += nzi; 982 current_space->local_remaining -= nzi; 983 current_space_lvl->array += nzi; 984 current_space_lvl->local_used += nzi; 985 current_space_lvl->local_remaining -= nzi; 986 } 987 988 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 989 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 990 991 /* destroy list of free space and other temporary arrays */ 992 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 993 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 994 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 995 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 996 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 997 998 #if defined(PETSC_USE_DEBUG) 999 { 1000 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1001 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));CHKERRQ(ierr); 1002 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));CHKERRQ(ierr); 1003 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));CHKERRQ(ierr); 1004 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));CHKERRQ(ierr); 1005 if (diagonal_fill) { 1006 ierr = PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));CHKERRQ(ierr); 1007 } 1008 } 1009 #endif 1010 1011 /* put together the new matrix */ 1012 ierr = MatCreate(A->comm,n,n,n,n,fact);CHKERRQ(ierr); 1013 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1014 ierr = MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);CHKERRQ(ierr); 1015 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1016 b = (Mat_SeqAIJ*)(*fact)->data; 1017 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1018 b->singlemalloc = PETSC_FALSE; 1019 len = (bi[n] )*sizeof(PetscScalar); 1020 /* the next line frees the default space generated by the Create() */ 1021 ierr = PetscFree(b->a);CHKERRQ(ierr); 1022 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1023 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1024 b->j = bj; 1025 b->i = bi; 1026 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1027 b->diag = bdiag; 1028 b->ilen = 0; 1029 b->imax = 0; 1030 b->row = isrow; 1031 b->col = iscol; 1032 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1033 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1034 b->icol = isicol; 1035 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1036 /* In b structure: Free imax, ilen, old a, old j. 1037 Allocate bdiag, solve_work, new a, new j */ 1038 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1039 b->maxnz = b->nz = bi[n] ; 1040 (*fact)->factor = FACTOR_LU; 1041 (*fact)->info.factor_mallocs = reallocs; 1042 (*fact)->info.fill_ratio_given = f; 1043 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1044 1045 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1046 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1047 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #include "src/mat/impls/sbaij/seq/sbaij.h" 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1054 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1055 { 1056 Mat C = *B; 1057 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1058 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1059 IS ip=b->row; 1060 PetscErrorCode ierr; 1061 PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol; 1062 PetscInt *ai=a->i,*aj=a->j; 1063 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1064 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1065 PetscReal zeropivot,rs,shiftnz; 1066 PetscTruth shiftpd; 1067 ChShift_Ctx sctx; 1068 PetscInt newshift; 1069 1070 PetscFunctionBegin; 1071 shiftnz = info->shiftnz; 1072 shiftpd = info->shiftpd; 1073 zeropivot = info->zeropivot; 1074 1075 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1076 1077 /* initialization */ 1078 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1079 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1080 jl = il + mbs; 1081 rtmp = (MatScalar*)(jl + mbs); 1082 1083 sctx.shift_amount = 0; 1084 sctx.nshift = 0; 1085 do { 1086 sctx.chshift = PETSC_FALSE; 1087 for (i=0; i<mbs; i++) { 1088 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1089 } 1090 1091 for (k = 0; k<mbs; k++){ 1092 bval = ba + bi[k]; 1093 /* initialize k-th row by the perm[k]-th row of A */ 1094 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1095 for (j = jmin; j < jmax; j++){ 1096 col = rip[aj[j]]; 1097 if (col >= k){ /* only take upper triangular entry */ 1098 rtmp[col] = aa[j]; 1099 *bval++ = 0.0; /* for in-place factorization */ 1100 } 1101 } 1102 /* shift the diagonal of the matrix */ 1103 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1104 1105 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1106 dk = rtmp[k]; 1107 i = jl[k]; /* first row to be added to k_th row */ 1108 1109 while (i < k){ 1110 nexti = jl[i]; /* next row to be added to k_th row */ 1111 1112 /* compute multiplier, update diag(k) and U(i,k) */ 1113 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1114 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1115 dk += uikdi*ba[ili]; 1116 ba[ili] = uikdi; /* -U(i,k) */ 1117 1118 /* add multiple of row i to k-th row */ 1119 jmin = ili + 1; jmax = bi[i+1]; 1120 if (jmin < jmax){ 1121 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1122 /* update il and jl for row i */ 1123 il[i] = jmin; 1124 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1125 } 1126 i = nexti; 1127 } 1128 1129 /* shift the diagonals when zero pivot is detected */ 1130 /* compute rs=sum of abs(off-diagonal) */ 1131 rs = 0.0; 1132 jmin = bi[k]+1; 1133 nz = bi[k+1] - jmin; 1134 if (nz){ 1135 bcol = bj + jmin; 1136 while (nz--){ 1137 rs += PetscAbsScalar(rtmp[*bcol]); 1138 bcol++; 1139 } 1140 } 1141 1142 sctx.rs = rs; 1143 sctx.pv = dk; 1144 ierr = MatCholeskyCheckShift_inline(info,sctx,newshift);CHKERRQ(ierr); 1145 if (newshift == 1){ 1146 break; /* sctx.shift_amount is updated */ 1147 } else if (newshift == -1){ 1148 SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs); 1149 } 1150 1151 /* copy data into U(k,:) */ 1152 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1153 jmin = bi[k]+1; jmax = bi[k+1]; 1154 if (jmin < jmax) { 1155 for (j=jmin; j<jmax; j++){ 1156 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1157 } 1158 /* add the k-th row into il and jl */ 1159 il[k] = jmin; 1160 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1161 } 1162 } 1163 } while (sctx.chshift); 1164 ierr = PetscFree(il);CHKERRQ(ierr); 1165 1166 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1167 C->factor = FACTOR_CHOLESKY; 1168 C->assembled = PETSC_TRUE; 1169 C->preallocated = PETSC_TRUE; 1170 ierr = PetscLogFlops(C->m);CHKERRQ(ierr); 1171 if (sctx.nshift){ 1172 if (shiftnz) { 1173 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1174 } else if (shiftpd) { 1175 ierr = PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));CHKERRQ(ierr); 1176 } 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1183 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1184 { 1185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1186 Mat_SeqSBAIJ *b; 1187 Mat B; 1188 PetscErrorCode ierr; 1189 PetscTruth perm_identity; 1190 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui; 1191 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1192 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1193 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1194 PetscReal fill=info->fill,levels=info->levels; 1195 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1196 FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1197 PetscBT lnkbt; 1198 1199 PetscFunctionBegin; 1200 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1201 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1202 1203 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1204 ui[0] = 0; 1205 1206 /* special case that simply copies fill pattern */ 1207 if (!levels && perm_identity) { 1208 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1209 for (i=0; i<am; i++) { 1210 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1211 } 1212 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1213 cols = uj; 1214 for (i=0; i<am; i++) { 1215 aj = a->j + a->diag[i]; 1216 ncols = ui[i+1] - ui[i]; 1217 for (j=0; j<ncols; j++) *cols++ = *aj++; 1218 } 1219 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1220 /* initialization */ 1221 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1222 1223 /* jl: linked list for storing indices of the pivot rows 1224 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1225 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1226 il = jl + am; 1227 uj_ptr = (PetscInt**)(il + am); 1228 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1229 for (i=0; i<am; i++){ 1230 jl[i] = am; il[i] = 0; 1231 } 1232 1233 /* create and initialize a linked list for storing column indices of the active row k */ 1234 nlnk = am + 1; 1235 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1236 1237 /* initial FreeSpace size is fill*(ai[am]+1) */ 1238 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1239 current_space = free_space; 1240 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1241 current_space_lvl = free_space_lvl; 1242 1243 for (k=0; k<am; k++){ /* for each active row k */ 1244 /* initialize lnk by the column indices of row rip[k] of A */ 1245 nzk = 0; 1246 ncols = ai[rip[k]+1] - ai[rip[k]]; 1247 ncols_upper = 0; 1248 for (j=0; j<ncols; j++){ 1249 i = *(aj + ai[rip[k]] + j); 1250 if (rip[i] >= k){ /* only take upper triangular entry */ 1251 ajtmp[ncols_upper] = i; 1252 ncols_upper++; 1253 } 1254 } 1255 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1256 nzk += nlnk; 1257 1258 /* update lnk by computing fill-in for each pivot row to be merged in */ 1259 prow = jl[k]; /* 1st pivot row */ 1260 1261 while (prow < k){ 1262 nextprow = jl[prow]; 1263 1264 /* merge prow into k-th row */ 1265 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1266 jmax = ui[prow+1]; 1267 ncols = jmax-jmin; 1268 i = jmin - ui[prow]; 1269 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1270 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1271 j = *(uj - 1); 1272 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1273 nzk += nlnk; 1274 1275 /* update il and jl for prow */ 1276 if (jmin < jmax){ 1277 il[prow] = jmin; 1278 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1279 } 1280 prow = nextprow; 1281 } 1282 1283 /* if free space is not available, make more free space */ 1284 if (current_space->local_remaining<nzk) { 1285 i = am - k + 1; /* num of unfactored rows */ 1286 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1287 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1288 ierr = GetMoreSpace(i,¤t_space_lvl);CHKERRQ(ierr); 1289 reallocs++; 1290 } 1291 1292 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1293 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1294 1295 /* add the k-th row into il and jl */ 1296 if (nzk > 1){ 1297 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1298 jl[k] = jl[i]; jl[i] = k; 1299 il[k] = ui[k] + 1; 1300 } 1301 uj_ptr[k] = current_space->array; 1302 uj_lvl_ptr[k] = current_space_lvl->array; 1303 1304 current_space->array += nzk; 1305 current_space->local_used += nzk; 1306 current_space->local_remaining -= nzk; 1307 1308 current_space_lvl->array += nzk; 1309 current_space_lvl->local_used += nzk; 1310 current_space_lvl->local_remaining -= nzk; 1311 1312 ui[k+1] = ui[k] + nzk; 1313 } 1314 1315 #if defined(PETSC_USE_DEBUG) 1316 if (ai[am] != 0) { 1317 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1318 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1319 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1320 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1323 } 1324 #endif 1325 1326 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1327 ierr = PetscFree(jl);CHKERRQ(ierr); 1328 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1329 1330 /* destroy list of free space and other temporary array(s) */ 1331 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1332 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1333 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1334 ierr = DestroySpace(free_space_lvl);CHKERRQ(ierr); 1335 1336 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1337 1338 /* put together the new matrix in MATSEQSBAIJ format */ 1339 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1340 B = *fact; 1341 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1342 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1343 1344 b = (Mat_SeqSBAIJ*)B->data; 1345 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1346 b->singlemalloc = PETSC_FALSE; 1347 /* the next line frees the default space generated by the Create() */ 1348 ierr = PetscFree(b->a);CHKERRQ(ierr); 1349 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1350 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1351 b->j = uj; 1352 b->i = ui; 1353 b->diag = 0; 1354 b->ilen = 0; 1355 b->imax = 0; 1356 b->row = perm; 1357 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1358 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1359 b->icol = perm; 1360 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1361 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1362 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1363 b->maxnz = b->nz = ui[am]; 1364 1365 B->factor = FACTOR_CHOLESKY; 1366 B->info.factor_mallocs = reallocs; 1367 B->info.fill_ratio_given = fill; 1368 if (ai[am] != 0) { 1369 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1370 } else { 1371 B->info.fill_ratio_needed = 0.0; 1372 } 1373 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1374 if (perm_identity){ 1375 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1376 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1383 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1384 { 1385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1386 Mat_SeqSBAIJ *b; 1387 Mat B; 1388 PetscErrorCode ierr; 1389 PetscTruth perm_identity; 1390 PetscReal fill = info->fill; 1391 PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow; 1392 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1393 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1394 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1395 PetscBT lnkbt; 1396 IS iperm; 1397 1398 PetscFunctionBegin; 1399 /* check whether perm is the identity mapping */ 1400 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1401 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1402 1403 if (!perm_identity){ 1404 /* check if perm is symmetric! */ 1405 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1406 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1407 for (i=0; i<am; i++) { 1408 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1409 } 1410 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1411 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1412 } 1413 1414 /* initialization */ 1415 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1416 ui[0] = 0; 1417 1418 /* jl: linked list for storing indices of the pivot rows 1419 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1420 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1421 il = jl + am; 1422 cols = il + am; 1423 ui_ptr = (PetscInt**)(cols + am); 1424 for (i=0; i<am; i++){ 1425 jl[i] = am; il[i] = 0; 1426 } 1427 1428 /* create and initialize a linked list for storing column indices of the active row k */ 1429 nlnk = am + 1; 1430 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1431 1432 /* initial FreeSpace size is fill*(ai[am]+1) */ 1433 ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1434 current_space = free_space; 1435 1436 for (k=0; k<am; k++){ /* for each active row k */ 1437 /* initialize lnk by the column indices of row rip[k] of A */ 1438 nzk = 0; 1439 ncols = ai[rip[k]+1] - ai[rip[k]]; 1440 ncols_upper = 0; 1441 for (j=0; j<ncols; j++){ 1442 i = rip[*(aj + ai[rip[k]] + j)]; 1443 if (i >= k){ /* only take upper triangular entry */ 1444 cols[ncols_upper] = i; 1445 ncols_upper++; 1446 } 1447 } 1448 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1449 nzk += nlnk; 1450 1451 /* update lnk by computing fill-in for each pivot row to be merged in */ 1452 prow = jl[k]; /* 1st pivot row */ 1453 1454 while (prow < k){ 1455 nextprow = jl[prow]; 1456 /* merge prow into k-th row */ 1457 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1458 jmax = ui[prow+1]; 1459 ncols = jmax-jmin; 1460 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1461 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1462 nzk += nlnk; 1463 1464 /* update il and jl for prow */ 1465 if (jmin < jmax){ 1466 il[prow] = jmin; 1467 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1468 } 1469 prow = nextprow; 1470 } 1471 1472 /* if free space is not available, make more free space */ 1473 if (current_space->local_remaining<nzk) { 1474 i = am - k + 1; /* num of unfactored rows */ 1475 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1476 ierr = GetMoreSpace(i,¤t_space);CHKERRQ(ierr); 1477 reallocs++; 1478 } 1479 1480 /* copy data into free space, then initialize lnk */ 1481 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1482 1483 /* add the k-th row into il and jl */ 1484 if (nzk-1 > 0){ 1485 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1486 jl[k] = jl[i]; jl[i] = k; 1487 il[k] = ui[k] + 1; 1488 } 1489 ui_ptr[k] = current_space->array; 1490 current_space->array += nzk; 1491 current_space->local_used += nzk; 1492 current_space->local_remaining -= nzk; 1493 1494 ui[k+1] = ui[k] + nzk; 1495 } 1496 1497 #if defined(PETSC_USE_DEBUG) 1498 if (ai[am] != 0) { 1499 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1500 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));CHKERRQ(ierr); 1501 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));CHKERRQ(ierr); 1502 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));CHKERRQ(ierr); 1503 } else { 1504 ierr = PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));CHKERRQ(ierr); 1505 } 1506 #endif 1507 1508 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1509 ierr = PetscFree(jl);CHKERRQ(ierr); 1510 1511 /* destroy list of free space and other temporary array(s) */ 1512 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1513 ierr = MakeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1514 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1515 1516 /* put together the new matrix in MATSEQSBAIJ format */ 1517 ierr = MatCreate(PETSC_COMM_SELF,am,am,am,am,fact);CHKERRQ(ierr); 1518 B = *fact; 1519 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1520 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 1521 1522 b = (Mat_SeqSBAIJ*)B->data; 1523 ierr = PetscFree(b->imax);CHKERRQ(ierr); 1524 b->singlemalloc = PETSC_FALSE; 1525 /* the next line frees the default space generated by the Create() */ 1526 ierr = PetscFree(b->a);CHKERRQ(ierr); 1527 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 1528 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1529 b->j = uj; 1530 b->i = ui; 1531 b->diag = 0; 1532 b->ilen = 0; 1533 b->imax = 0; 1534 b->row = perm; 1535 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1536 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1537 b->icol = perm; 1538 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1539 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1540 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1541 b->maxnz = b->nz = ui[am]; 1542 1543 B->factor = FACTOR_CHOLESKY; 1544 B->info.factor_mallocs = reallocs; 1545 B->info.fill_ratio_given = fill; 1546 if (ai[am] != 0) { 1547 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1548 } else { 1549 B->info.fill_ratio_needed = 0.0; 1550 } 1551 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1552 if (perm_identity){ 1553 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1554 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558