1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 250 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 251 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,**bi_ptr; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 293 294 /* initial FreeSpace size is f*(ai[n]+1) */ 295 f = info->fill; 296 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 297 current_space = free_space; 298 299 for (i=0; i<n; i++) { 300 /* copy previous fill into linked list */ 301 nzi = 0; 302 nnz = ai[r[i]+1] - ai[r[i]]; 303 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 304 ajtmp = aj + ai[r[i]]; 305 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 306 nzi += nlnk; 307 308 /* add pivot rows into linked list */ 309 row = lnk[n]; 310 while (row < i) { 311 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 312 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 313 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 314 nzi += nlnk; 315 row = lnk[row]; 316 } 317 bi[i+1] = bi[i] + nzi; 318 im[i] = nzi; 319 320 /* mark bdiag */ 321 nzbd = 0; 322 nnz = nzi; 323 k = lnk[n]; 324 while (nnz-- && k < i){ 325 nzbd++; 326 k = lnk[k]; 327 } 328 bdiag[i] = bi[i] + nzbd; 329 330 /* if free space is not available, make more free space */ 331 if (current_space->local_remaining<nzi) { 332 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 333 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 334 reallocs++; 335 } 336 337 /* copy data into free space, then initialize lnk */ 338 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 339 bi_ptr[i] = current_space->array; 340 current_space->array += nzi; 341 current_space->local_used += nzi; 342 current_space->local_remaining -= nzi; 343 } 344 #if defined(PETSC_USE_INFO) 345 if (ai[n] != 0) { 346 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 347 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 349 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 350 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 351 } else { 352 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 353 } 354 #endif 355 356 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 357 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 358 359 /* destroy list of free space and other temporary array(s) */ 360 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 361 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 362 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 363 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 364 365 /* put together the new matrix */ 366 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 367 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 368 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 369 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 370 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 371 b = (Mat_SeqAIJ*)(*B)->data; 372 b->free_a = PETSC_TRUE; 373 b->free_ij = PETSC_TRUE; 374 b->singlemalloc = PETSC_FALSE; 375 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 376 b->j = bj; 377 b->i = bi; 378 b->diag = bdiag; 379 b->ilen = 0; 380 b->imax = 0; 381 b->row = isrow; 382 b->col = iscol; 383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 385 b->icol = isicol; 386 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 387 388 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 389 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 390 b->maxnz = b->nz = bi[n] ; 391 392 (*B)->factor = FACTOR_LU; 393 (*B)->info.factor_mallocs = reallocs; 394 (*B)->info.fill_ratio_given = f; 395 396 if (ai[n] != 0) { 397 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 398 } else { 399 (*B)->info.fill_ratio_needed = 0.0; 400 } 401 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 402 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 403 PetscFunctionReturn(0); 404 } 405 406 /* 407 Trouble in factorization, should we dump the original matrix? 408 */ 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatFactorDumpMatrix" 411 PetscErrorCode MatFactorDumpMatrix(Mat A) 412 { 413 PetscErrorCode ierr; 414 PetscTruth flg; 415 416 PetscFunctionBegin; 417 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 418 if (flg) { 419 PetscViewer viewer; 420 char filename[PETSC_MAX_PATH_LEN]; 421 422 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 423 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 424 ierr = MatView(A,viewer);CHKERRQ(ierr); 425 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 /* ----------------------------------------------------------- */ 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 434 { 435 Mat C=*B; 436 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 437 IS isrow = b->row,isicol = b->icol; 438 PetscErrorCode ierr; 439 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 440 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 441 PetscInt *diag_offset = b->diag,diag,*pj; 442 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 570 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 571 { 572 PetscFunctionBegin; 573 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 574 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 575 PetscFunctionReturn(0); 576 } 577 578 579 /* ----------------------------------------------------------- */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatLUFactor_SeqAIJ" 582 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 583 { 584 PetscErrorCode ierr; 585 Mat C; 586 587 PetscFunctionBegin; 588 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 589 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 590 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 591 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 /* ----------------------------------------------------------- */ 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatSolve_SeqAIJ" 597 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 598 { 599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 600 IS iscol = a->col,isrow = a->row; 601 PetscErrorCode ierr; 602 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 603 PetscInt nz,*rout,*cout; 604 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 605 606 PetscFunctionBegin; 607 if (!n) PetscFunctionReturn(0); 608 609 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 610 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 611 tmp = a->solve_work; 612 613 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 614 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 615 616 /* forward solve the lower triangular */ 617 tmp[0] = b[*r++]; 618 tmps = tmp; 619 for (i=1; i<n; i++) { 620 v = aa + ai[i] ; 621 vi = aj + ai[i] ; 622 nz = a->diag[i] - ai[i]; 623 sum = b[*r++]; 624 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 625 tmp[i] = sum; 626 } 627 628 /* backward solve the upper triangular */ 629 for (i=n-1; i>=0; i--){ 630 v = aa + a->diag[i] + 1; 631 vi = aj + a->diag[i] + 1; 632 nz = ai[i+1] - a->diag[i] - 1; 633 sum = tmp[i]; 634 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 635 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 636 } 637 638 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 639 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 640 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 641 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 642 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatMatSolve_SeqAIJ" 648 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 649 { 650 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 651 IS iscol = a->col,isrow = a->row; 652 PetscErrorCode ierr; 653 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 654 PetscInt nz,*rout,*cout,neq; 655 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 656 657 PetscFunctionBegin; 658 if (!n) PetscFunctionReturn(0); 659 660 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 661 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 662 663 tmp = a->solve_work; 664 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 665 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 666 667 for (neq=0; neq<n; neq++){ 668 /* forward solve the lower triangular */ 669 tmp[0] = b[r[0]]; 670 tmps = tmp; 671 for (i=1; i<n; i++) { 672 v = aa + ai[i] ; 673 vi = aj + ai[i] ; 674 nz = a->diag[i] - ai[i]; 675 sum = b[r[i]]; 676 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 677 tmp[i] = sum; 678 } 679 /* backward solve the upper triangular */ 680 for (i=n-1; i>=0; i--){ 681 v = aa + a->diag[i] + 1; 682 vi = aj + a->diag[i] + 1; 683 nz = ai[i+1] - a->diag[i] - 1; 684 sum = tmp[i]; 685 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 686 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 687 } 688 689 b += n; 690 x += n; 691 } 692 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 693 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 694 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 695 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 696 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 /* ----------------------------------------------------------- */ 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 703 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 704 { 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 706 PetscErrorCode ierr; 707 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 708 PetscScalar *x,*b,*aa = a->a; 709 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 710 PetscInt adiag_i,i,*vi,nz,ai_i; 711 PetscScalar *v,sum; 712 #endif 713 714 PetscFunctionBegin; 715 if (!n) PetscFunctionReturn(0); 716 717 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 718 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 719 720 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 721 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 722 #else 723 /* forward solve the lower triangular */ 724 x[0] = b[0]; 725 for (i=1; i<n; i++) { 726 ai_i = ai[i]; 727 v = aa + ai_i; 728 vi = aj + ai_i; 729 nz = adiag[i] - ai_i; 730 sum = b[i]; 731 while (nz--) sum -= *v++ * x[*vi++]; 732 x[i] = sum; 733 } 734 735 /* backward solve the upper triangular */ 736 for (i=n-1; i>=0; i--){ 737 adiag_i = adiag[i]; 738 v = aa + adiag_i + 1; 739 vi = aj + adiag_i + 1; 740 nz = ai[i+1] - adiag_i - 1; 741 sum = x[i]; 742 while (nz--) sum -= *v++ * x[*vi++]; 743 x[i] = sum*aa[adiag_i]; 744 } 745 #endif 746 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 747 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 754 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 PetscErrorCode ierr; 759 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 760 PetscInt nz,*rout,*cout; 761 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 762 763 PetscFunctionBegin; 764 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 765 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 772 773 /* forward solve the lower triangular */ 774 tmp[0] = b[*r++]; 775 for (i=1; i<n; i++) { 776 v = aa + ai[i] ; 777 vi = aj + ai[i] ; 778 nz = a->diag[i] - ai[i]; 779 sum = b[*r++]; 780 while (nz--) sum -= *v++ * tmp[*vi++ ]; 781 tmp[i] = sum; 782 } 783 784 /* backward solve the upper triangular */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + a->diag[i] + 1; 787 vi = aj + a->diag[i] + 1; 788 nz = ai[i+1] - a->diag[i] - 1; 789 sum = tmp[i]; 790 while (nz--) sum -= *v++ * tmp[*vi++ ]; 791 tmp[i] = sum*aa[a->diag[i]]; 792 x[*c--] += tmp[i]; 793 } 794 795 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 797 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 798 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 799 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 800 801 PetscFunctionReturn(0); 802 } 803 /* -------------------------------------------------------------------*/ 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 806 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 807 { 808 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 809 IS iscol = a->col,isrow = a->row; 810 PetscErrorCode ierr; 811 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 812 PetscInt nz,*rout,*cout,*diag = a->diag; 813 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 814 815 PetscFunctionBegin; 816 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 tmp = a->solve_work; 819 820 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 821 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 822 823 /* copy the b into temp work space according to permutation */ 824 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 825 826 /* forward solve the U^T */ 827 for (i=0; i<n; i++) { 828 v = aa + diag[i] ; 829 vi = aj + diag[i] + 1; 830 nz = ai[i+1] - diag[i] - 1; 831 s1 = tmp[i]; 832 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 833 while (nz--) { 834 tmp[*vi++ ] -= (*v++)*s1; 835 } 836 tmp[i] = s1; 837 } 838 839 /* backward solve the L^T */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + diag[i] - 1 ; 842 vi = aj + diag[i] - 1 ; 843 nz = diag[i] - ai[i]; 844 s1 = tmp[i]; 845 while (nz--) { 846 tmp[*vi-- ] -= (*v--)*s1; 847 } 848 } 849 850 /* copy tmp into x according to permutation */ 851 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 852 853 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 854 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 855 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 857 858 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 864 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 865 { 866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 867 IS iscol = a->col,isrow = a->row; 868 PetscErrorCode ierr; 869 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 870 PetscInt nz,*rout,*cout,*diag = a->diag; 871 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 872 873 PetscFunctionBegin; 874 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 875 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 tmp = a->solve_work; 879 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 882 883 /* copy the b into temp work space according to permutation */ 884 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 885 886 /* forward solve the U^T */ 887 for (i=0; i<n; i++) { 888 v = aa + diag[i] ; 889 vi = aj + diag[i] + 1; 890 nz = ai[i+1] - diag[i] - 1; 891 tmp[i] *= *v++; 892 while (nz--) { 893 tmp[*vi++ ] -= (*v++)*tmp[i]; 894 } 895 } 896 897 /* backward solve the L^T */ 898 for (i=n-1; i>=0; i--){ 899 v = aa + diag[i] - 1 ; 900 vi = aj + diag[i] - 1 ; 901 nz = diag[i] - ai[i]; 902 while (nz--) { 903 tmp[*vi-- ] -= (*v--)*tmp[i]; 904 } 905 } 906 907 /* copy tmp into x according to permutation */ 908 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 909 910 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 911 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 912 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 913 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 914 915 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 /* ----------------------------------------------------------------*/ 919 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 923 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 924 { 925 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 926 IS isicol; 927 PetscErrorCode ierr; 928 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 929 PetscInt *bi,*cols,nnz,*cols_lvl; 930 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 931 PetscInt i,levels,diagonal_fill; 932 PetscTruth col_identity,row_identity; 933 PetscReal f; 934 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 935 PetscBT lnkbt; 936 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 937 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 938 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 939 PetscTruth missing; 940 941 PetscFunctionBegin; 942 f = info->fill; 943 levels = (PetscInt)info->levels; 944 diagonal_fill = (PetscInt)info->diagonal_fill; 945 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 946 947 /* special case that simply copies fill pattern */ 948 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 949 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 950 if (!levels && row_identity && col_identity) { 951 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 952 (*fact)->factor = FACTOR_LU; 953 (*fact)->info.factor_mallocs = 0; 954 (*fact)->info.fill_ratio_given = info->fill; 955 (*fact)->info.fill_ratio_needed = 1.0; 956 b = (Mat_SeqAIJ*)(*fact)->data; 957 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 958 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 959 b->row = isrow; 960 b->col = iscol; 961 b->icol = isicol; 962 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 963 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 964 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 965 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 970 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 971 972 /* get new row pointers */ 973 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 974 bi[0] = 0; 975 /* bdiag is location of diagonal in factor */ 976 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 977 bdiag[0] = 0; 978 979 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 980 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 981 982 /* create a linked list for storing column indices of the active row */ 983 nlnk = n + 1; 984 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 985 986 /* initial FreeSpace size is f*(ai[n]+1) */ 987 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 988 current_space = free_space; 989 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 990 current_space_lvl = free_space_lvl; 991 992 for (i=0; i<n; i++) { 993 nzi = 0; 994 /* copy current row into linked list */ 995 nnz = ai[r[i]+1] - ai[r[i]]; 996 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 997 cols = aj + ai[r[i]]; 998 lnk[i] = -1; /* marker to indicate if diagonal exists */ 999 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1000 nzi += nlnk; 1001 1002 /* make sure diagonal entry is included */ 1003 if (diagonal_fill && lnk[i] == -1) { 1004 fm = n; 1005 while (lnk[fm] < i) fm = lnk[fm]; 1006 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1007 lnk[fm] = i; 1008 lnk_lvl[i] = 0; 1009 nzi++; dcount++; 1010 } 1011 1012 /* add pivot rows into the active row */ 1013 nzbd = 0; 1014 prow = lnk[n]; 1015 while (prow < i) { 1016 nnz = bdiag[prow]; 1017 cols = bj_ptr[prow] + nnz + 1; 1018 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1019 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1020 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1021 nzi += nlnk; 1022 prow = lnk[prow]; 1023 nzbd++; 1024 } 1025 bdiag[i] = nzbd; 1026 bi[i+1] = bi[i] + nzi; 1027 1028 /* if free space is not available, make more free space */ 1029 if (current_space->local_remaining<nzi) { 1030 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1031 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1032 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1033 reallocs++; 1034 } 1035 1036 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1037 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1038 bj_ptr[i] = current_space->array; 1039 bjlvl_ptr[i] = current_space_lvl->array; 1040 1041 /* make sure the active row i has diagonal entry */ 1042 if (*(bj_ptr[i]+bdiag[i]) != i) { 1043 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1044 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1045 } 1046 1047 current_space->array += nzi; 1048 current_space->local_used += nzi; 1049 current_space->local_remaining -= nzi; 1050 current_space_lvl->array += nzi; 1051 current_space_lvl->local_used += nzi; 1052 current_space_lvl->local_remaining -= nzi; 1053 } 1054 1055 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1056 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1057 1058 /* destroy list of free space and other temporary arrays */ 1059 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1060 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1061 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1062 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1063 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1064 1065 #if defined(PETSC_USE_INFO) 1066 { 1067 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1068 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1069 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1070 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1071 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1072 if (diagonal_fill) { 1073 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1074 } 1075 } 1076 #endif 1077 1078 /* put together the new matrix */ 1079 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1080 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1081 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1082 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1083 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1084 b = (Mat_SeqAIJ*)(*fact)->data; 1085 b->free_a = PETSC_TRUE; 1086 b->free_ij = PETSC_TRUE; 1087 b->singlemalloc = PETSC_FALSE; 1088 len = (bi[n] )*sizeof(PetscScalar); 1089 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1090 b->j = bj; 1091 b->i = bi; 1092 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1093 b->diag = bdiag; 1094 b->ilen = 0; 1095 b->imax = 0; 1096 b->row = isrow; 1097 b->col = iscol; 1098 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1099 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1100 b->icol = isicol; 1101 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1102 /* In b structure: Free imax, ilen, old a, old j. 1103 Allocate bdiag, solve_work, new a, new j */ 1104 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1105 b->maxnz = b->nz = bi[n] ; 1106 (*fact)->factor = FACTOR_LU; 1107 (*fact)->info.factor_mallocs = reallocs; 1108 (*fact)->info.fill_ratio_given = f; 1109 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1110 1111 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1112 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1113 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #include "src/mat/impls/sbaij/seq/sbaij.h" 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1120 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1121 { 1122 Mat C = *B; 1123 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1124 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1125 IS ip=b->row; 1126 PetscErrorCode ierr; 1127 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1128 PetscInt *ai=a->i,*aj=a->j; 1129 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1130 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1131 PetscReal zeropivot,rs,shiftnz; 1132 PetscReal shiftpd; 1133 ChShift_Ctx sctx; 1134 PetscInt newshift; 1135 1136 PetscFunctionBegin; 1137 shiftnz = info->shiftnz; 1138 shiftpd = info->shiftpd; 1139 zeropivot = info->zeropivot; 1140 1141 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1142 1143 /* initialization */ 1144 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1145 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1146 jl = il + mbs; 1147 rtmp = (MatScalar*)(jl + mbs); 1148 1149 sctx.shift_amount = 0; 1150 sctx.nshift = 0; 1151 do { 1152 sctx.chshift = PETSC_FALSE; 1153 for (i=0; i<mbs; i++) { 1154 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1155 } 1156 1157 for (k = 0; k<mbs; k++){ 1158 bval = ba + bi[k]; 1159 /* initialize k-th row by the perm[k]-th row of A */ 1160 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1161 for (j = jmin; j < jmax; j++){ 1162 col = rip[aj[j]]; 1163 if (col >= k){ /* only take upper triangular entry */ 1164 rtmp[col] = aa[j]; 1165 *bval++ = 0.0; /* for in-place factorization */ 1166 } 1167 } 1168 /* shift the diagonal of the matrix */ 1169 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1170 1171 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1172 dk = rtmp[k]; 1173 i = jl[k]; /* first row to be added to k_th row */ 1174 1175 while (i < k){ 1176 nexti = jl[i]; /* next row to be added to k_th row */ 1177 1178 /* compute multiplier, update diag(k) and U(i,k) */ 1179 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1180 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1181 dk += uikdi*ba[ili]; 1182 ba[ili] = uikdi; /* -U(i,k) */ 1183 1184 /* add multiple of row i to k-th row */ 1185 jmin = ili + 1; jmax = bi[i+1]; 1186 if (jmin < jmax){ 1187 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1188 /* update il and jl for row i */ 1189 il[i] = jmin; 1190 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1191 } 1192 i = nexti; 1193 } 1194 1195 /* shift the diagonals when zero pivot is detected */ 1196 /* compute rs=sum of abs(off-diagonal) */ 1197 rs = 0.0; 1198 jmin = bi[k]+1; 1199 nz = bi[k+1] - jmin; 1200 if (nz){ 1201 bcol = bj + jmin; 1202 while (nz--){ 1203 rs += PetscAbsScalar(rtmp[*bcol]); 1204 bcol++; 1205 } 1206 } 1207 1208 sctx.rs = rs; 1209 sctx.pv = dk; 1210 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1211 if (newshift == 1) break; 1212 1213 /* copy data into U(k,:) */ 1214 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1215 jmin = bi[k]+1; jmax = bi[k+1]; 1216 if (jmin < jmax) { 1217 for (j=jmin; j<jmax; j++){ 1218 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1219 } 1220 /* add the k-th row into il and jl */ 1221 il[k] = jmin; 1222 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1223 } 1224 } 1225 } while (sctx.chshift); 1226 ierr = PetscFree(il);CHKERRQ(ierr); 1227 1228 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1229 C->factor = FACTOR_CHOLESKY; 1230 C->assembled = PETSC_TRUE; 1231 C->preallocated = PETSC_TRUE; 1232 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1233 if (sctx.nshift){ 1234 if (shiftnz) { 1235 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1236 } else if (shiftpd) { 1237 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1238 } 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1245 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1246 { 1247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1248 Mat_SeqSBAIJ *b; 1249 Mat B; 1250 PetscErrorCode ierr; 1251 PetscTruth perm_identity; 1252 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1253 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1254 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1255 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1256 PetscReal fill=info->fill,levels=info->levels; 1257 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1258 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1259 PetscBT lnkbt; 1260 1261 PetscFunctionBegin; 1262 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1263 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1264 1265 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1266 ui[0] = 0; 1267 1268 /* special case that simply copies fill pattern */ 1269 if (!levels && perm_identity) { 1270 for (i=0; i<am; i++) { 1271 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1272 } 1273 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1274 cols = uj; 1275 for (i=0; i<am; i++) { 1276 aj = a->j + a->diag[i]; 1277 ncols = ui[i+1] - ui[i]; 1278 for (j=0; j<ncols; j++) *cols++ = *aj++; 1279 } 1280 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1281 /* initialization */ 1282 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1283 1284 /* jl: linked list for storing indices of the pivot rows 1285 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1286 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1287 il = jl + am; 1288 uj_ptr = (PetscInt**)(il + am); 1289 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1290 for (i=0; i<am; i++){ 1291 jl[i] = am; il[i] = 0; 1292 } 1293 1294 /* create and initialize a linked list for storing column indices of the active row k */ 1295 nlnk = am + 1; 1296 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1297 1298 /* initial FreeSpace size is fill*(ai[am]+1) */ 1299 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1300 current_space = free_space; 1301 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1302 current_space_lvl = free_space_lvl; 1303 1304 for (k=0; k<am; k++){ /* for each active row k */ 1305 /* initialize lnk by the column indices of row rip[k] of A */ 1306 nzk = 0; 1307 ncols = ai[rip[k]+1] - ai[rip[k]]; 1308 ncols_upper = 0; 1309 for (j=0; j<ncols; j++){ 1310 i = *(aj + ai[rip[k]] + j); 1311 if (rip[i] >= k){ /* only take upper triangular entry */ 1312 ajtmp[ncols_upper] = i; 1313 ncols_upper++; 1314 } 1315 } 1316 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1317 nzk += nlnk; 1318 1319 /* update lnk by computing fill-in for each pivot row to be merged in */ 1320 prow = jl[k]; /* 1st pivot row */ 1321 1322 while (prow < k){ 1323 nextprow = jl[prow]; 1324 1325 /* merge prow into k-th row */ 1326 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1327 jmax = ui[prow+1]; 1328 ncols = jmax-jmin; 1329 i = jmin - ui[prow]; 1330 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1331 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1332 j = *(uj - 1); 1333 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1334 nzk += nlnk; 1335 1336 /* update il and jl for prow */ 1337 if (jmin < jmax){ 1338 il[prow] = jmin; 1339 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1340 } 1341 prow = nextprow; 1342 } 1343 1344 /* if free space is not available, make more free space */ 1345 if (current_space->local_remaining<nzk) { 1346 i = am - k + 1; /* num of unfactored rows */ 1347 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1348 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1349 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1350 reallocs++; 1351 } 1352 1353 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1354 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1355 1356 /* add the k-th row into il and jl */ 1357 if (nzk > 1){ 1358 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1359 jl[k] = jl[i]; jl[i] = k; 1360 il[k] = ui[k] + 1; 1361 } 1362 uj_ptr[k] = current_space->array; 1363 uj_lvl_ptr[k] = current_space_lvl->array; 1364 1365 current_space->array += nzk; 1366 current_space->local_used += nzk; 1367 current_space->local_remaining -= nzk; 1368 1369 current_space_lvl->array += nzk; 1370 current_space_lvl->local_used += nzk; 1371 current_space_lvl->local_remaining -= nzk; 1372 1373 ui[k+1] = ui[k] + nzk; 1374 } 1375 1376 #if defined(PETSC_USE_INFO) 1377 if (ai[am] != 0) { 1378 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1379 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1380 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1381 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1384 } 1385 #endif 1386 1387 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1388 ierr = PetscFree(jl);CHKERRQ(ierr); 1389 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1390 1391 /* destroy list of free space and other temporary array(s) */ 1392 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1393 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1394 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1395 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1396 1397 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1398 1399 /* put together the new matrix in MATSEQSBAIJ format */ 1400 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1401 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1402 B = *fact; 1403 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1404 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1405 1406 b = (Mat_SeqSBAIJ*)B->data; 1407 b->singlemalloc = PETSC_FALSE; 1408 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1409 b->j = uj; 1410 b->i = ui; 1411 b->diag = 0; 1412 b->ilen = 0; 1413 b->imax = 0; 1414 b->row = perm; 1415 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1416 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1417 b->icol = perm; 1418 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1419 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1420 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1421 b->maxnz = b->nz = ui[am]; 1422 b->free_a = PETSC_TRUE; 1423 b->free_ij = PETSC_TRUE; 1424 1425 B->factor = FACTOR_CHOLESKY; 1426 B->info.factor_mallocs = reallocs; 1427 B->info.fill_ratio_given = fill; 1428 if (ai[am] != 0) { 1429 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1430 } else { 1431 B->info.fill_ratio_needed = 0.0; 1432 } 1433 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1434 if (perm_identity){ 1435 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1436 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1443 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1444 { 1445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1446 Mat_SeqSBAIJ *b; 1447 Mat B; 1448 PetscErrorCode ierr; 1449 PetscTruth perm_identity; 1450 PetscReal fill = info->fill; 1451 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1452 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1453 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1454 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1455 PetscBT lnkbt; 1456 IS iperm; 1457 1458 PetscFunctionBegin; 1459 /* check whether perm is the identity mapping */ 1460 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1461 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1462 1463 if (!perm_identity){ 1464 /* check if perm is symmetric! */ 1465 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1466 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1467 for (i=0; i<am; i++) { 1468 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1469 } 1470 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1471 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1472 } 1473 1474 /* initialization */ 1475 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1476 ui[0] = 0; 1477 1478 /* jl: linked list for storing indices of the pivot rows 1479 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1480 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1481 il = jl + am; 1482 cols = il + am; 1483 ui_ptr = (PetscInt**)(cols + am); 1484 for (i=0; i<am; i++){ 1485 jl[i] = am; il[i] = 0; 1486 } 1487 1488 /* create and initialize a linked list for storing column indices of the active row k */ 1489 nlnk = am + 1; 1490 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1491 1492 /* initial FreeSpace size is fill*(ai[am]+1) */ 1493 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1494 current_space = free_space; 1495 1496 for (k=0; k<am; k++){ /* for each active row k */ 1497 /* initialize lnk by the column indices of row rip[k] of A */ 1498 nzk = 0; 1499 ncols = ai[rip[k]+1] - ai[rip[k]]; 1500 ncols_upper = 0; 1501 for (j=0; j<ncols; j++){ 1502 i = rip[*(aj + ai[rip[k]] + j)]; 1503 if (i >= k){ /* only take upper triangular entry */ 1504 cols[ncols_upper] = i; 1505 ncols_upper++; 1506 } 1507 } 1508 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1509 nzk += nlnk; 1510 1511 /* update lnk by computing fill-in for each pivot row to be merged in */ 1512 prow = jl[k]; /* 1st pivot row */ 1513 1514 while (prow < k){ 1515 nextprow = jl[prow]; 1516 /* merge prow into k-th row */ 1517 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1518 jmax = ui[prow+1]; 1519 ncols = jmax-jmin; 1520 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1521 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1522 nzk += nlnk; 1523 1524 /* update il and jl for prow */ 1525 if (jmin < jmax){ 1526 il[prow] = jmin; 1527 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1528 } 1529 prow = nextprow; 1530 } 1531 1532 /* if free space is not available, make more free space */ 1533 if (current_space->local_remaining<nzk) { 1534 i = am - k + 1; /* num of unfactored rows */ 1535 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1536 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1537 reallocs++; 1538 } 1539 1540 /* copy data into free space, then initialize lnk */ 1541 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1542 1543 /* add the k-th row into il and jl */ 1544 if (nzk-1 > 0){ 1545 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1546 jl[k] = jl[i]; jl[i] = k; 1547 il[k] = ui[k] + 1; 1548 } 1549 ui_ptr[k] = current_space->array; 1550 current_space->array += nzk; 1551 current_space->local_used += nzk; 1552 current_space->local_remaining -= nzk; 1553 1554 ui[k+1] = ui[k] + nzk; 1555 } 1556 1557 #if defined(PETSC_USE_INFO) 1558 if (ai[am] != 0) { 1559 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1560 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1561 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1562 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1563 } else { 1564 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1565 } 1566 #endif 1567 1568 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1569 ierr = PetscFree(jl);CHKERRQ(ierr); 1570 1571 /* destroy list of free space and other temporary array(s) */ 1572 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1573 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1574 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1575 1576 /* put together the new matrix in MATSEQSBAIJ format */ 1577 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1578 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1579 B = *fact; 1580 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1581 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1582 1583 b = (Mat_SeqSBAIJ*)B->data; 1584 b->singlemalloc = PETSC_FALSE; 1585 b->free_a = PETSC_TRUE; 1586 b->free_ij = PETSC_TRUE; 1587 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1588 b->j = uj; 1589 b->i = ui; 1590 b->diag = 0; 1591 b->ilen = 0; 1592 b->imax = 0; 1593 b->row = perm; 1594 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1595 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1596 b->icol = perm; 1597 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1598 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1599 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1600 b->maxnz = b->nz = ui[am]; 1601 1602 B->factor = FACTOR_CHOLESKY; 1603 B->info.factor_mallocs = reallocs; 1604 B->info.fill_ratio_given = fill; 1605 if (ai[am] != 0) { 1606 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1607 } else { 1608 B->info.fill_ratio_needed = 0.0; 1609 } 1610 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1611 if (perm_identity){ 1612 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1613 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1614 } 1615 PetscFunctionReturn(0); 1616 } 1617