1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 250 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 251 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,**bi_ptr; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 293 294 /* initial FreeSpace size is f*(ai[n]+1) */ 295 f = info->fill; 296 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 297 current_space = free_space; 298 299 for (i=0; i<n; i++) { 300 /* copy previous fill into linked list */ 301 nzi = 0; 302 nnz = ai[r[i]+1] - ai[r[i]]; 303 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 304 ajtmp = aj + ai[r[i]]; 305 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 306 nzi += nlnk; 307 308 /* add pivot rows into linked list */ 309 row = lnk[n]; 310 while (row < i) { 311 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 312 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 313 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 314 nzi += nlnk; 315 row = lnk[row]; 316 } 317 bi[i+1] = bi[i] + nzi; 318 im[i] = nzi; 319 320 /* mark bdiag */ 321 nzbd = 0; 322 nnz = nzi; 323 k = lnk[n]; 324 while (nnz-- && k < i){ 325 nzbd++; 326 k = lnk[k]; 327 } 328 bdiag[i] = bi[i] + nzbd; 329 330 /* if free space is not available, make more free space */ 331 if (current_space->local_remaining<nzi) { 332 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 333 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 334 reallocs++; 335 } 336 337 /* copy data into free space, then initialize lnk */ 338 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 339 bi_ptr[i] = current_space->array; 340 current_space->array += nzi; 341 current_space->local_used += nzi; 342 current_space->local_remaining -= nzi; 343 } 344 #if defined(PETSC_USE_INFO) 345 if (ai[n] != 0) { 346 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 347 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 349 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 350 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 351 } else { 352 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 353 } 354 #endif 355 356 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 357 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 358 359 /* destroy list of free space and other temporary array(s) */ 360 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 361 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 362 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 363 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 364 365 /* put together the new matrix */ 366 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 367 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 368 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 369 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 370 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 371 b = (Mat_SeqAIJ*)(*B)->data; 372 b->free_a = PETSC_TRUE; 373 b->free_ij = PETSC_TRUE; 374 b->singlemalloc = PETSC_FALSE; 375 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 376 b->j = bj; 377 b->i = bi; 378 b->diag = bdiag; 379 b->ilen = 0; 380 b->imax = 0; 381 b->row = isrow; 382 b->col = iscol; 383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 385 b->icol = isicol; 386 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 387 388 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 389 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 390 b->maxnz = b->nz = bi[n] ; 391 392 (*B)->factor = FACTOR_LU; 393 (*B)->info.factor_mallocs = reallocs; 394 (*B)->info.fill_ratio_given = f; 395 396 if (ai[n] != 0) { 397 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 398 } else { 399 (*B)->info.fill_ratio_needed = 0.0; 400 } 401 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 402 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 403 PetscFunctionReturn(0); 404 } 405 406 /* 407 Trouble in factorization, should we dump the original matrix? 408 */ 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatFactorDumpMatrix" 411 PetscErrorCode MatFactorDumpMatrix(Mat A) 412 { 413 PetscErrorCode ierr; 414 PetscTruth flg; 415 416 PetscFunctionBegin; 417 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 418 if (flg) { 419 PetscViewer viewer; 420 char filename[PETSC_MAX_PATH_LEN]; 421 422 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 423 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 424 ierr = MatView(A,viewer);CHKERRQ(ierr); 425 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 /* ----------------------------------------------------------- */ 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 434 { 435 Mat C=*B; 436 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 437 IS isrow = b->row,isicol = b->icol; 438 PetscErrorCode ierr; 439 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 440 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 441 PetscInt *diag_offset = b->diag,diag,*pj; 442 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 570 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 571 { 572 PetscFunctionBegin; 573 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 574 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 575 PetscFunctionReturn(0); 576 } 577 578 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatLUFactor_SeqAIJ" 582 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 583 { 584 PetscErrorCode ierr; 585 Mat C; 586 587 PetscFunctionBegin; 588 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 589 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 590 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 591 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 /* ----------------------------------------------------------- */ 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatSolve_SeqAIJ" 597 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 598 { 599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 600 IS iscol = a->col,isrow = a->row; 601 PetscErrorCode ierr; 602 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 603 PetscInt nz,*rout,*cout; 604 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 605 606 PetscFunctionBegin; 607 if (!n) PetscFunctionReturn(0); 608 609 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 610 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 611 tmp = a->solve_work; 612 613 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 614 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 615 616 /* forward solve the lower triangular */ 617 tmp[0] = b[*r++]; 618 tmps = tmp; 619 for (i=1; i<n; i++) { 620 v = aa + ai[i] ; 621 vi = aj + ai[i] ; 622 nz = a->diag[i] - ai[i]; 623 sum = b[*r++]; 624 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 625 tmp[i] = sum; 626 } 627 628 /* backward solve the upper triangular */ 629 for (i=n-1; i>=0; i--){ 630 v = aa + a->diag[i] + 1; 631 vi = aj + a->diag[i] + 1; 632 nz = ai[i+1] - a->diag[i] - 1; 633 sum = tmp[i]; 634 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 635 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 636 } 637 638 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 639 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 640 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 642 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatMatSolve_SeqAIJ" 648 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 649 { 650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 651 IS iscol = a->col,isrow = a->row; 652 PetscErrorCode ierr; 653 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 654 PetscInt nz,*rout,*cout,neq; 655 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 656 657 PetscFunctionBegin; 658 if (!n) PetscFunctionReturn(0); 659 660 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 661 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 662 663 tmp = a->solve_work; 664 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 665 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 666 667 for (neq=0; neq<n; neq++){ 668 /* forward solve the lower triangular */ 669 tmp[0] = b[r[0]]; 670 tmps = tmp; 671 for (i=1; i<n; i++) { 672 v = aa + ai[i] ; 673 vi = aj + ai[i] ; 674 nz = a->diag[i] - ai[i]; 675 sum = b[r[i]]; 676 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 677 tmp[i] = sum; 678 } 679 /* backward solve the upper triangular */ 680 for (i=n-1; i>=0; i--){ 681 v = aa + a->diag[i] + 1; 682 vi = aj + a->diag[i] + 1; 683 nz = ai[i+1] - a->diag[i] - 1; 684 sum = tmp[i]; 685 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 686 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 687 } 688 689 b += n; 690 x += n; 691 } 692 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 693 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 694 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 695 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 696 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 /* ----------------------------------------------------------- */ 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 703 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 704 { 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 706 PetscErrorCode ierr; 707 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 708 PetscScalar *x,*b,*aa = a->a; 709 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 710 PetscInt adiag_i,i,*vi,nz,ai_i; 711 PetscScalar *v,sum; 712 #endif 713 714 PetscFunctionBegin; 715 if (!n) PetscFunctionReturn(0); 716 717 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 718 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 719 720 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 721 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 722 #else 723 /* forward solve the lower triangular */ 724 x[0] = b[0]; 725 for (i=1; i<n; i++) { 726 ai_i = ai[i]; 727 v = aa + ai_i; 728 vi = aj + ai_i; 729 nz = adiag[i] - ai_i; 730 sum = b[i]; 731 while (nz--) sum -= *v++ * x[*vi++]; 732 x[i] = sum; 733 } 734 735 /* backward solve the upper triangular */ 736 for (i=n-1; i>=0; i--){ 737 adiag_i = adiag[i]; 738 v = aa + adiag_i + 1; 739 vi = aj + adiag_i + 1; 740 nz = ai[i+1] - adiag_i - 1; 741 sum = x[i]; 742 while (nz--) sum -= *v++ * x[*vi++]; 743 x[i] = sum*aa[adiag_i]; 744 } 745 #endif 746 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 754 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 PetscErrorCode ierr; 759 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 760 PetscInt nz,*rout,*cout; 761 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 762 763 PetscFunctionBegin; 764 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 765 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 772 773 /* forward solve the lower triangular */ 774 tmp[0] = b[*r++]; 775 for (i=1; i<n; i++) { 776 v = aa + ai[i] ; 777 vi = aj + ai[i] ; 778 nz = a->diag[i] - ai[i]; 779 sum = b[*r++]; 780 while (nz--) sum -= *v++ * tmp[*vi++ ]; 781 tmp[i] = sum; 782 } 783 784 /* backward solve the upper triangular */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + a->diag[i] + 1; 787 vi = aj + a->diag[i] + 1; 788 nz = ai[i+1] - a->diag[i] - 1; 789 sum = tmp[i]; 790 while (nz--) sum -= *v++ * tmp[*vi++ ]; 791 tmp[i] = sum*aa[a->diag[i]]; 792 x[*c--] += tmp[i]; 793 } 794 795 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 797 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 798 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 799 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 800 801 PetscFunctionReturn(0); 802 } 803 /* -------------------------------------------------------------------*/ 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 806 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 807 { 808 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 809 IS iscol = a->col,isrow = a->row; 810 PetscErrorCode ierr; 811 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 812 PetscInt nz,*rout,*cout,*diag = a->diag; 813 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 814 815 PetscFunctionBegin; 816 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 tmp = a->solve_work; 819 820 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 821 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 822 823 /* copy the b into temp work space according to permutation */ 824 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 825 826 /* forward solve the U^T */ 827 for (i=0; i<n; i++) { 828 v = aa + diag[i] ; 829 vi = aj + diag[i] + 1; 830 nz = ai[i+1] - diag[i] - 1; 831 s1 = tmp[i]; 832 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 833 while (nz--) { 834 tmp[*vi++ ] -= (*v++)*s1; 835 } 836 tmp[i] = s1; 837 } 838 839 /* backward solve the L^T */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + diag[i] - 1 ; 842 vi = aj + diag[i] - 1 ; 843 nz = diag[i] - ai[i]; 844 s1 = tmp[i]; 845 while (nz--) { 846 tmp[*vi-- ] -= (*v--)*s1; 847 } 848 } 849 850 /* copy tmp into x according to permutation */ 851 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 852 853 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 854 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 855 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 864 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 865 { 866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 867 IS iscol = a->col,isrow = a->row; 868 PetscErrorCode ierr; 869 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 870 PetscInt nz,*rout,*cout,*diag = a->diag; 871 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 872 873 PetscFunctionBegin; 874 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 875 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 tmp = a->solve_work; 879 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 882 883 /* copy the b into temp work space according to permutation */ 884 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 885 886 /* forward solve the U^T */ 887 for (i=0; i<n; i++) { 888 v = aa + diag[i] ; 889 vi = aj + diag[i] + 1; 890 nz = ai[i+1] - diag[i] - 1; 891 tmp[i] *= *v++; 892 while (nz--) { 893 tmp[*vi++ ] -= (*v++)*tmp[i]; 894 } 895 } 896 897 /* backward solve the L^T */ 898 for (i=n-1; i>=0; i--){ 899 v = aa + diag[i] - 1 ; 900 vi = aj + diag[i] - 1 ; 901 nz = diag[i] - ai[i]; 902 while (nz--) { 903 tmp[*vi-- ] -= (*v--)*tmp[i]; 904 } 905 } 906 907 /* copy tmp into x according to permutation */ 908 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 909 910 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 912 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 913 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 914 915 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 /* ----------------------------------------------------------------*/ 919 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 923 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 924 { 925 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 926 IS isicol; 927 PetscErrorCode ierr; 928 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 929 PetscInt *bi,*cols,nnz,*cols_lvl; 930 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 931 PetscInt i,levels,diagonal_fill; 932 PetscTruth col_identity,row_identity; 933 PetscReal f; 934 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 935 PetscBT lnkbt; 936 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 937 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 938 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 939 PetscTruth missing; 940 941 PetscFunctionBegin; 942 f = info->fill; 943 levels = (PetscInt)info->levels; 944 diagonal_fill = (PetscInt)info->diagonal_fill; 945 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 946 947 /* special case that simply copies fill pattern */ 948 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 949 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 950 if (!levels && row_identity && col_identity) { 951 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 952 (*fact)->factor = FACTOR_LU; 953 (*fact)->info.factor_mallocs = 0; 954 (*fact)->info.fill_ratio_given = info->fill; 955 (*fact)->info.fill_ratio_needed = 1.0; 956 b = (Mat_SeqAIJ*)(*fact)->data; 957 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 958 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 959 b->row = isrow; 960 b->col = iscol; 961 b->icol = isicol; 962 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 963 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 964 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 965 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 970 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 971 972 /* get new row pointers */ 973 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 974 bi[0] = 0; 975 /* bdiag is location of diagonal in factor */ 976 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 977 bdiag[0] = 0; 978 979 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 980 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 981 982 /* create a linked list for storing column indices of the active row */ 983 nlnk = n + 1; 984 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 985 986 /* initial FreeSpace size is f*(ai[n]+1) */ 987 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 988 current_space = free_space; 989 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 990 current_space_lvl = free_space_lvl; 991 992 for (i=0; i<n; i++) { 993 nzi = 0; 994 /* copy current row into linked list */ 995 nnz = ai[r[i]+1] - ai[r[i]]; 996 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 997 cols = aj + ai[r[i]]; 998 lnk[i] = -1; /* marker to indicate if diagonal exists */ 999 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1000 nzi += nlnk; 1001 1002 /* make sure diagonal entry is included */ 1003 if (diagonal_fill && lnk[i] == -1) { 1004 fm = n; 1005 while (lnk[fm] < i) fm = lnk[fm]; 1006 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1007 lnk[fm] = i; 1008 lnk_lvl[i] = 0; 1009 nzi++; dcount++; 1010 } 1011 1012 /* add pivot rows into the active row */ 1013 nzbd = 0; 1014 prow = lnk[n]; 1015 while (prow < i) { 1016 nnz = bdiag[prow]; 1017 cols = bj_ptr[prow] + nnz + 1; 1018 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1019 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1020 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1021 nzi += nlnk; 1022 prow = lnk[prow]; 1023 nzbd++; 1024 } 1025 bdiag[i] = nzbd; 1026 bi[i+1] = bi[i] + nzi; 1027 1028 /* if free space is not available, make more free space */ 1029 if (current_space->local_remaining<nzi) { 1030 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1031 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1032 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1033 reallocs++; 1034 } 1035 1036 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1037 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1038 bj_ptr[i] = current_space->array; 1039 bjlvl_ptr[i] = current_space_lvl->array; 1040 1041 /* make sure the active row i has diagonal entry */ 1042 if (*(bj_ptr[i]+bdiag[i]) != i) { 1043 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1044 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1045 } 1046 1047 current_space->array += nzi; 1048 current_space->local_used += nzi; 1049 current_space->local_remaining -= nzi; 1050 current_space_lvl->array += nzi; 1051 current_space_lvl->local_used += nzi; 1052 current_space_lvl->local_remaining -= nzi; 1053 } 1054 1055 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1056 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1057 1058 /* destroy list of free space and other temporary arrays */ 1059 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1060 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1061 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1062 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1063 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1064 1065 #if defined(PETSC_USE_INFO) 1066 { 1067 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1068 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1069 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1070 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1071 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1072 if (diagonal_fill) { 1073 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1074 } 1075 } 1076 #endif 1077 1078 /* put together the new matrix */ 1079 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1080 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1081 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1082 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1083 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1084 b = (Mat_SeqAIJ*)(*fact)->data; 1085 b->free_a = PETSC_TRUE; 1086 b->free_ij = PETSC_TRUE; 1087 b->singlemalloc = PETSC_FALSE; 1088 len = (bi[n] )*sizeof(PetscScalar); 1089 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1090 b->j = bj; 1091 b->i = bi; 1092 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1093 b->diag = bdiag; 1094 b->ilen = 0; 1095 b->imax = 0; 1096 b->row = isrow; 1097 b->col = iscol; 1098 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1099 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1100 b->icol = isicol; 1101 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1102 /* In b structure: Free imax, ilen, old a, old j. 1103 Allocate bdiag, solve_work, new a, new j */ 1104 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1105 b->maxnz = b->nz = bi[n] ; 1106 (*fact)->factor = FACTOR_LU; 1107 (*fact)->info.factor_mallocs = reallocs; 1108 (*fact)->info.fill_ratio_given = f; 1109 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1110 1111 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1112 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1113 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #include "src/mat/impls/sbaij/seq/sbaij.h" 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1120 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1121 { 1122 Mat C = *B; 1123 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1124 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1125 IS ip=b->row,iip = b->icol; 1126 PetscErrorCode ierr; 1127 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1128 PetscInt *ai=a->i,*aj=a->j; 1129 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1130 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1131 PetscReal zeropivot,rs,shiftnz; 1132 PetscReal shiftpd; 1133 ChShift_Ctx sctx; 1134 PetscInt newshift; 1135 1136 PetscFunctionBegin; 1137 shiftnz = info->shiftnz; 1138 shiftpd = info->shiftpd; 1139 zeropivot = info->zeropivot; 1140 1141 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1142 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1143 1144 /* initialization */ 1145 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1146 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1147 jl = il + mbs; 1148 rtmp = (MatScalar*)(jl + mbs); 1149 1150 sctx.shift_amount = 0; 1151 sctx.nshift = 0; 1152 do { 1153 sctx.chshift = PETSC_FALSE; 1154 for (i=0; i<mbs; i++) { 1155 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1156 } 1157 1158 for (k = 0; k<mbs; k++){ 1159 bval = ba + bi[k]; 1160 /* initialize k-th row by the perm[k]-th row of A */ 1161 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1162 for (j = jmin; j < jmax; j++){ 1163 col = riip[aj[j]]; 1164 if (col >= k){ /* only take upper triangular entry */ 1165 rtmp[col] = aa[j]; 1166 *bval++ = 0.0; /* for in-place factorization */ 1167 } 1168 } 1169 /* shift the diagonal of the matrix */ 1170 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1171 1172 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1173 dk = rtmp[k]; 1174 i = jl[k]; /* first row to be added to k_th row */ 1175 1176 while (i < k){ 1177 nexti = jl[i]; /* next row to be added to k_th row */ 1178 1179 /* compute multiplier, update diag(k) and U(i,k) */ 1180 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1181 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1182 dk += uikdi*ba[ili]; 1183 ba[ili] = uikdi; /* -U(i,k) */ 1184 1185 /* add multiple of row i to k-th row */ 1186 jmin = ili + 1; jmax = bi[i+1]; 1187 if (jmin < jmax){ 1188 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1189 /* update il and jl for row i */ 1190 il[i] = jmin; 1191 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1192 } 1193 i = nexti; 1194 } 1195 1196 /* shift the diagonals when zero pivot is detected */ 1197 /* compute rs=sum of abs(off-diagonal) */ 1198 rs = 0.0; 1199 jmin = bi[k]+1; 1200 nz = bi[k+1] - jmin; 1201 if (nz){ 1202 bcol = bj + jmin; 1203 while (nz--){ 1204 rs += PetscAbsScalar(rtmp[*bcol]); 1205 bcol++; 1206 } 1207 } 1208 1209 sctx.rs = rs; 1210 sctx.pv = dk; 1211 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1212 if (newshift == 1) break; 1213 1214 /* copy data into U(k,:) */ 1215 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1216 jmin = bi[k]+1; jmax = bi[k+1]; 1217 if (jmin < jmax) { 1218 for (j=jmin; j<jmax; j++){ 1219 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1220 } 1221 /* add the k-th row into il and jl */ 1222 il[k] = jmin; 1223 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1224 } 1225 } 1226 } while (sctx.chshift); 1227 ierr = PetscFree(il);CHKERRQ(ierr); 1228 1229 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1230 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1231 C->factor = FACTOR_CHOLESKY; 1232 C->assembled = PETSC_TRUE; 1233 C->preallocated = PETSC_TRUE; 1234 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1235 if (sctx.nshift){ 1236 if (shiftnz) { 1237 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1238 } else if (shiftpd) { 1239 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1240 } 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1247 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1248 { 1249 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1250 Mat_SeqSBAIJ *b; 1251 Mat B; 1252 PetscErrorCode ierr; 1253 PetscTruth perm_identity; 1254 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1255 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1256 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1257 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1258 PetscReal fill=info->fill,levels=info->levels; 1259 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1260 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1261 PetscBT lnkbt; 1262 1263 PetscFunctionBegin; 1264 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1265 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1266 1267 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1268 ui[0] = 0; 1269 1270 /* special case that simply copies fill pattern */ 1271 if (!levels && perm_identity) { 1272 for (i=0; i<am; i++) { 1273 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1274 } 1275 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1276 cols = uj; 1277 for (i=0; i<am; i++) { 1278 aj = a->j + a->diag[i]; 1279 ncols = ui[i+1] - ui[i]; 1280 for (j=0; j<ncols; j++) *cols++ = *aj++; 1281 } 1282 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1283 /* initialization */ 1284 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1285 1286 /* jl: linked list for storing indices of the pivot rows 1287 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1288 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1289 il = jl + am; 1290 uj_ptr = (PetscInt**)(il + am); 1291 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1292 for (i=0; i<am; i++){ 1293 jl[i] = am; il[i] = 0; 1294 } 1295 1296 /* create and initialize a linked list for storing column indices of the active row k */ 1297 nlnk = am + 1; 1298 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1299 1300 /* initial FreeSpace size is fill*(ai[am]+1) */ 1301 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1302 current_space = free_space; 1303 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1304 current_space_lvl = free_space_lvl; 1305 1306 for (k=0; k<am; k++){ /* for each active row k */ 1307 /* initialize lnk by the column indices of row rip[k] of A */ 1308 nzk = 0; 1309 ncols = ai[rip[k]+1] - ai[rip[k]]; 1310 ncols_upper = 0; 1311 for (j=0; j<ncols; j++){ 1312 i = *(aj + ai[rip[k]] + j); 1313 if (rip[i] >= k){ /* only take upper triangular entry */ 1314 ajtmp[ncols_upper] = i; 1315 ncols_upper++; 1316 } 1317 } 1318 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1319 nzk += nlnk; 1320 1321 /* update lnk by computing fill-in for each pivot row to be merged in */ 1322 prow = jl[k]; /* 1st pivot row */ 1323 1324 while (prow < k){ 1325 nextprow = jl[prow]; 1326 1327 /* merge prow into k-th row */ 1328 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1329 jmax = ui[prow+1]; 1330 ncols = jmax-jmin; 1331 i = jmin - ui[prow]; 1332 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1333 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1334 j = *(uj - 1); 1335 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1336 nzk += nlnk; 1337 1338 /* update il and jl for prow */ 1339 if (jmin < jmax){ 1340 il[prow] = jmin; 1341 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1342 } 1343 prow = nextprow; 1344 } 1345 1346 /* if free space is not available, make more free space */ 1347 if (current_space->local_remaining<nzk) { 1348 i = am - k + 1; /* num of unfactored rows */ 1349 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1350 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1351 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1352 reallocs++; 1353 } 1354 1355 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1356 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1357 1358 /* add the k-th row into il and jl */ 1359 if (nzk > 1){ 1360 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1361 jl[k] = jl[i]; jl[i] = k; 1362 il[k] = ui[k] + 1; 1363 } 1364 uj_ptr[k] = current_space->array; 1365 uj_lvl_ptr[k] = current_space_lvl->array; 1366 1367 current_space->array += nzk; 1368 current_space->local_used += nzk; 1369 current_space->local_remaining -= nzk; 1370 1371 current_space_lvl->array += nzk; 1372 current_space_lvl->local_used += nzk; 1373 current_space_lvl->local_remaining -= nzk; 1374 1375 ui[k+1] = ui[k] + nzk; 1376 } 1377 1378 #if defined(PETSC_USE_INFO) 1379 if (ai[am] != 0) { 1380 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1381 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1382 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1383 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1384 } else { 1385 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1386 } 1387 #endif 1388 1389 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1390 ierr = PetscFree(jl);CHKERRQ(ierr); 1391 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1392 1393 /* destroy list of free space and other temporary array(s) */ 1394 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1395 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1396 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1397 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1398 1399 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1400 1401 /* put together the new matrix in MATSEQSBAIJ format */ 1402 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1403 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1404 B = *fact; 1405 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1406 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1407 1408 b = (Mat_SeqSBAIJ*)B->data; 1409 b->singlemalloc = PETSC_FALSE; 1410 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1411 b->j = uj; 1412 b->i = ui; 1413 b->diag = 0; 1414 b->ilen = 0; 1415 b->imax = 0; 1416 b->row = perm; 1417 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1418 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1419 b->icol = perm; 1420 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1421 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1422 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1423 b->maxnz = b->nz = ui[am]; 1424 b->free_a = PETSC_TRUE; 1425 b->free_ij = PETSC_TRUE; 1426 1427 B->factor = FACTOR_CHOLESKY; 1428 B->info.factor_mallocs = reallocs; 1429 B->info.fill_ratio_given = fill; 1430 if (ai[am] != 0) { 1431 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1432 } else { 1433 B->info.fill_ratio_needed = 0.0; 1434 } 1435 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1436 if (perm_identity){ 1437 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1445 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1446 { 1447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1448 Mat_SeqSBAIJ *b; 1449 Mat B; 1450 PetscErrorCode ierr; 1451 PetscTruth perm_identity; 1452 PetscReal fill = info->fill; 1453 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1454 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1455 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1456 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1457 PetscBT lnkbt; 1458 IS iperm; 1459 1460 PetscFunctionBegin; 1461 /* check whether perm is the identity mapping */ 1462 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1463 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1464 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1465 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1466 1467 /* initialization */ 1468 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1469 ui[0] = 0; 1470 1471 /* jl: linked list for storing indices of the pivot rows 1472 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1473 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1474 il = jl + am; 1475 cols = il + am; 1476 ui_ptr = (PetscInt**)(cols + am); 1477 for (i=0; i<am; i++){ 1478 jl[i] = am; il[i] = 0; 1479 } 1480 1481 /* create and initialize a linked list for storing column indices of the active row k */ 1482 nlnk = am + 1; 1483 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1484 1485 /* initial FreeSpace size is fill*(ai[am]+1) */ 1486 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1487 current_space = free_space; 1488 1489 for (k=0; k<am; k++){ /* for each active row k */ 1490 /* initialize lnk by the column indices of row rip[k] of A */ 1491 nzk = 0; 1492 ncols = ai[rip[k]+1] - ai[rip[k]]; 1493 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1494 ncols_upper = 0; 1495 for (j=0; j<ncols; j++){ 1496 i = riip[*(aj + ai[rip[k]] + j)]; 1497 if (i >= k){ /* only take upper triangular entry */ 1498 cols[ncols_upper] = i; 1499 ncols_upper++; 1500 } 1501 } 1502 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1503 nzk += nlnk; 1504 1505 /* update lnk by computing fill-in for each pivot row to be merged in */ 1506 prow = jl[k]; /* 1st pivot row */ 1507 1508 while (prow < k){ 1509 nextprow = jl[prow]; 1510 /* merge prow into k-th row */ 1511 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1512 jmax = ui[prow+1]; 1513 ncols = jmax-jmin; 1514 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1515 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1516 nzk += nlnk; 1517 1518 /* update il and jl for prow */ 1519 if (jmin < jmax){ 1520 il[prow] = jmin; 1521 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1522 } 1523 prow = nextprow; 1524 } 1525 1526 /* if free space is not available, make more free space */ 1527 if (current_space->local_remaining<nzk) { 1528 i = am - k + 1; /* num of unfactored rows */ 1529 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1530 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1531 reallocs++; 1532 } 1533 1534 /* copy data into free space, then initialize lnk */ 1535 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1536 1537 /* add the k-th row into il and jl */ 1538 if (nzk-1 > 0){ 1539 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1540 jl[k] = jl[i]; jl[i] = k; 1541 il[k] = ui[k] + 1; 1542 } 1543 ui_ptr[k] = current_space->array; 1544 current_space->array += nzk; 1545 current_space->local_used += nzk; 1546 current_space->local_remaining -= nzk; 1547 1548 ui[k+1] = ui[k] + nzk; 1549 } 1550 1551 #if defined(PETSC_USE_INFO) 1552 if (ai[am] != 0) { 1553 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1554 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1555 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1556 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1559 } 1560 #endif 1561 1562 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1563 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1564 ierr = PetscFree(jl);CHKERRQ(ierr); 1565 1566 /* destroy list of free space and other temporary array(s) */ 1567 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1568 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1569 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1570 1571 /* put together the new matrix in MATSEQSBAIJ format */ 1572 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1573 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1574 B = *fact; 1575 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1576 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1577 1578 b = (Mat_SeqSBAIJ*)B->data; 1579 b->singlemalloc = PETSC_FALSE; 1580 b->free_a = PETSC_TRUE; 1581 b->free_ij = PETSC_TRUE; 1582 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1583 b->j = uj; 1584 b->i = ui; 1585 b->diag = 0; 1586 b->ilen = 0; 1587 b->imax = 0; 1588 b->row = perm; 1589 b->col = perm; 1590 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1591 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1592 b->icol = iperm; 1593 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1594 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1595 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1596 b->maxnz = b->nz = ui[am]; 1597 1598 B->factor = FACTOR_CHOLESKY; 1599 B->info.factor_mallocs = reallocs; 1600 B->info.fill_ratio_given = fill; 1601 if (ai[am] != 0) { 1602 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1603 } else { 1604 B->info.fill_ratio_needed = 0.0; 1605 } 1606 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1607 if (perm_identity){ 1608 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1609 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613