1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ; 252 else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 253 (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ; 254 (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 255 (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 267 { 268 PetscFunctionBegin; 269 *flg = PETSC_TRUE; 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 278 { 279 PetscInt n = A->rmap->n; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 284 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 285 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 286 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 287 if (ftype == MAT_FACTOR_ILU) { 288 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 289 } else { 290 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 291 } 292 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 293 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 294 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 295 if (ftype == MAT_FACTOR_ICC) { 296 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 297 } else { 298 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 299 } 300 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 301 (*B)->factor = ftype; 302 PetscFunctionReturn(0); 303 } 304 EXTERN_C_END 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 308 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 309 { 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 311 IS isicol; 312 PetscErrorCode ierr; 313 const PetscInt *r,*ic; 314 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 315 PetscInt *bi,*bj,*ajtmp; 316 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 317 PetscReal f; 318 PetscInt nlnk,*lnk,k,**bi_ptr; 319 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 320 PetscBT lnkbt; 321 322 PetscFunctionBegin; 323 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 324 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 325 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 326 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 327 328 /* get new row pointers */ 329 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 330 bi[0] = 0; 331 332 /* bdiag is location of diagonal in factor */ 333 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 334 bdiag[0] = 0; 335 336 /* linked list for storing column indices of the active row */ 337 nlnk = n + 1; 338 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 339 340 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 341 342 /* initial FreeSpace size is f*(ai[n]+1) */ 343 f = info->fill; 344 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 345 current_space = free_space; 346 347 for (i=0; i<n; i++) { 348 /* copy previous fill into linked list */ 349 nzi = 0; 350 nnz = ai[r[i]+1] - ai[r[i]]; 351 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 352 ajtmp = aj + ai[r[i]]; 353 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 354 nzi += nlnk; 355 356 /* add pivot rows into linked list */ 357 row = lnk[n]; 358 while (row < i) { 359 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 360 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 361 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 362 nzi += nlnk; 363 row = lnk[row]; 364 } 365 bi[i+1] = bi[i] + nzi; 366 im[i] = nzi; 367 368 /* mark bdiag */ 369 nzbd = 0; 370 nnz = nzi; 371 k = lnk[n]; 372 while (nnz-- && k < i){ 373 nzbd++; 374 k = lnk[k]; 375 } 376 bdiag[i] = bi[i] + nzbd; 377 378 /* if free space is not available, make more free space */ 379 if (current_space->local_remaining<nzi) { 380 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 381 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 382 reallocs++; 383 } 384 385 /* copy data into free space, then initialize lnk */ 386 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 387 bi_ptr[i] = current_space->array; 388 current_space->array += nzi; 389 current_space->local_used += nzi; 390 current_space->local_remaining -= nzi; 391 } 392 #if defined(PETSC_USE_INFO) 393 if (ai[n] != 0) { 394 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 395 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 396 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 397 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 398 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 399 } else { 400 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 401 } 402 #endif 403 404 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 405 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 406 407 /* destroy list of free space and other temporary array(s) */ 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 409 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 410 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 411 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 412 413 /* put together the new matrix */ 414 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 415 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 416 b = (Mat_SeqAIJ*)(B)->data; 417 b->free_a = PETSC_TRUE; 418 b->free_ij = PETSC_TRUE; 419 b->singlemalloc = PETSC_FALSE; 420 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 421 b->j = bj; 422 b->i = bi; 423 b->diag = bdiag; 424 b->ilen = 0; 425 b->imax = 0; 426 b->row = isrow; 427 b->col = iscol; 428 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 429 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 430 b->icol = isicol; 431 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 432 433 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 434 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 435 b->maxnz = b->nz = bi[n] ; 436 437 (B)->factor = MAT_FACTOR_LU; 438 (B)->info.factor_mallocs = reallocs; 439 (B)->info.fill_ratio_given = f; 440 441 if (ai[n]) { 442 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 443 } else { 444 (B)->info.fill_ratio_needed = 0.0; 445 } 446 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 447 (B)->ops->solve = MatSolve_SeqAIJ; 448 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 449 /* switch to inodes if appropriate */ 450 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 /* 455 Trouble in factorization, should we dump the original matrix? 456 */ 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatFactorDumpMatrix" 459 PetscErrorCode MatFactorDumpMatrix(Mat A) 460 { 461 PetscErrorCode ierr; 462 PetscTruth flg = PETSC_FALSE; 463 464 PetscFunctionBegin; 465 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 466 if (flg) { 467 PetscViewer viewer; 468 char filename[PETSC_MAX_PATH_LEN]; 469 470 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 471 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 472 ierr = MatView(A,viewer);CHKERRQ(ierr); 473 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 479 480 /* ----------------------------------------------------------- */ 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 483 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 484 { 485 Mat C=B; 486 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 487 IS isrow = b->row,isicol = b->icol; 488 PetscErrorCode ierr; 489 const PetscInt *r,*ic,*ics; 490 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 491 PetscInt *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj; 492 MatScalar *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a; 493 PetscReal rs; 494 LUShift_Ctx sctx; 495 PetscInt newshift,*ddiag; 496 497 PetscFunctionBegin; 498 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 499 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 500 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 501 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 502 ics = ic; 503 504 sctx.shift_top = 0; 505 sctx.nshift_max = 0; 506 sctx.shift_lo = 0; 507 sctx.shift_hi = 0; 508 sctx.shift_fraction = 0; 509 510 /* if both shift schemes are chosen by user, only use info->shiftpd */ 511 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 512 ddiag = a->diag; 513 sctx.shift_top = info->zeropivot; 514 for (i=0; i<n; i++) { 515 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 516 d = (aa)[ddiag[i]]; 517 rs = -PetscAbsScalar(d) - PetscRealPart(d); 518 v = aa+ai[i]; 519 nz = ai[i+1] - ai[i]; 520 for (j=0; j<nz; j++) 521 rs += PetscAbsScalar(v[j]); 522 if (rs>sctx.shift_top) sctx.shift_top = rs; 523 } 524 sctx.shift_top *= 1.1; 525 sctx.nshift_max = 5; 526 sctx.shift_lo = 0.; 527 sctx.shift_hi = 1.; 528 } 529 530 sctx.shift_amount = 0.0; 531 sctx.nshift = 0; 532 do { 533 sctx.lushift = PETSC_FALSE; 534 for (i=0; i<n; i++){ 535 nz = bi[i+1] - bi[i]; 536 bjtmp = bj + bi[i]; 537 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 538 539 /* load in initial (unfactored row) */ 540 nz = ai[r[i]+1] - ai[r[i]]; 541 ajtmp = aj + ai[r[i]]; 542 v = aa + ai[r[i]]; 543 for (j=0; j<nz; j++) { 544 rtmp[ics[ajtmp[j]]] = v[j]; 545 } 546 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 547 548 row = *bjtmp++; 549 while (row < i) { 550 pc = rtmp + row; 551 if (*pc != 0.0) { 552 pv = b->a + diag_offset[row]; 553 pj = b->j + diag_offset[row] + 1; 554 multiplier = *pc / *pv++; 555 *pc = multiplier; 556 nz = bi[row+1] - diag_offset[row] - 1; 557 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 558 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 559 } 560 row = *bjtmp++; 561 } 562 /* finished row so stick it into b->a */ 563 pv = b->a + bi[i] ; 564 pj = b->j + bi[i] ; 565 nz = bi[i+1] - bi[i]; 566 diag = diag_offset[i] - bi[i]; 567 rs = -PetscAbsScalar(pv[diag]); 568 for (j=0; j<nz; j++) { 569 pv[j] = rtmp[pj[j]]; 570 rs += PetscAbsScalar(pv[j]); 571 } 572 573 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 574 sctx.rs = rs; 575 sctx.pv = pv[diag]; 576 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 577 if (newshift == 1) break; 578 } 579 580 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 581 /* 582 * if no shift in this attempt & shifting & started shifting & can refine, 583 * then try lower shift 584 */ 585 sctx.shift_hi = sctx.shift_fraction; 586 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 587 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 588 sctx.lushift = PETSC_TRUE; 589 sctx.nshift++; 590 } 591 } while (sctx.lushift); 592 593 /* invert diagonal entries for simplier triangular solves */ 594 for (i=0; i<n; i++) { 595 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 596 } 597 ierr = PetscFree(rtmp);CHKERRQ(ierr); 598 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 599 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 600 if (b->inode.use) { 601 C->ops->solve = MatSolve_Inode; 602 } else { 603 PetscTruth row_identity, col_identity; 604 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 605 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 606 if (row_identity && col_identity) { 607 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 608 } else { 609 C->ops->solve = MatSolve_SeqAIJ; 610 } 611 } 612 C->ops->solveadd = MatSolveAdd_SeqAIJ; 613 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 614 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 615 C->ops->matsolve = MatMatSolve_SeqAIJ; 616 C->assembled = PETSC_TRUE; 617 C->preallocated = PETSC_TRUE; 618 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 619 if (sctx.nshift){ 620 if (info->shiftpd) { 621 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 622 } else if (info->shiftnz) { 623 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 624 } 625 } 626 PetscFunctionReturn(0); 627 } 628 629 /* 630 This routine implements inplace ILU(0) with row or/and column permutations. 631 Input: 632 A - original matrix 633 Output; 634 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 635 a->j (col index) is permuted by the inverse of colperm, then sorted 636 a->a reordered accordingly with a->j 637 a->diag (ptr to diagonal elements) is updated. 638 */ 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 641 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 642 { 643 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 644 IS isrow = a->row,isicol = a->icol; 645 PetscErrorCode ierr; 646 const PetscInt *r,*ic,*ics; 647 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 648 PetscInt *ajtmp,nz,row; 649 PetscInt *diag = a->diag,nbdiag,*pj; 650 PetscScalar *rtmp,*pc,multiplier,d; 651 MatScalar *v,*pv; 652 PetscReal rs; 653 LUShift_Ctx sctx; 654 PetscInt newshift; 655 656 PetscFunctionBegin; 657 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 658 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 659 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 660 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 661 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 662 ics = ic; 663 664 sctx.shift_top = 0; 665 sctx.nshift_max = 0; 666 sctx.shift_lo = 0; 667 sctx.shift_hi = 0; 668 sctx.shift_fraction = 0; 669 670 /* if both shift schemes are chosen by user, only use info->shiftpd */ 671 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 672 sctx.shift_top = 0; 673 for (i=0; i<n; i++) { 674 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 675 d = (a->a)[diag[i]]; 676 rs = -PetscAbsScalar(d) - PetscRealPart(d); 677 v = a->a+ai[i]; 678 nz = ai[i+1] - ai[i]; 679 for (j=0; j<nz; j++) 680 rs += PetscAbsScalar(v[j]); 681 if (rs>sctx.shift_top) sctx.shift_top = rs; 682 } 683 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 684 sctx.shift_top *= 1.1; 685 sctx.nshift_max = 5; 686 sctx.shift_lo = 0.; 687 sctx.shift_hi = 1.; 688 } 689 690 sctx.shift_amount = 0; 691 sctx.nshift = 0; 692 do { 693 sctx.lushift = PETSC_FALSE; 694 for (i=0; i<n; i++){ 695 /* load in initial unfactored row */ 696 nz = ai[r[i]+1] - ai[r[i]]; 697 ajtmp = aj + ai[r[i]]; 698 v = a->a + ai[r[i]]; 699 /* sort permuted ajtmp and values v accordingly */ 700 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 701 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 702 703 diag[r[i]] = ai[r[i]]; 704 for (j=0; j<nz; j++) { 705 rtmp[ajtmp[j]] = v[j]; 706 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 707 } 708 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 709 710 row = *ajtmp++; 711 while (row < i) { 712 pc = rtmp + row; 713 if (*pc != 0.0) { 714 pv = a->a + diag[r[row]]; 715 pj = aj + diag[r[row]] + 1; 716 717 multiplier = *pc / *pv++; 718 *pc = multiplier; 719 nz = ai[r[row]+1] - diag[r[row]] - 1; 720 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 721 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 722 } 723 row = *ajtmp++; 724 } 725 /* finished row so overwrite it onto a->a */ 726 pv = a->a + ai[r[i]] ; 727 pj = aj + ai[r[i]] ; 728 nz = ai[r[i]+1] - ai[r[i]]; 729 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 730 731 rs = 0.0; 732 for (j=0; j<nz; j++) { 733 pv[j] = rtmp[pj[j]]; 734 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 735 } 736 737 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 738 sctx.rs = rs; 739 sctx.pv = pv[nbdiag]; 740 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 741 if (newshift == 1) break; 742 } 743 744 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 745 /* 746 * if no shift in this attempt & shifting & started shifting & can refine, 747 * then try lower shift 748 */ 749 sctx.shift_hi = sctx.shift_fraction; 750 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 751 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 752 sctx.lushift = PETSC_TRUE; 753 sctx.nshift++; 754 } 755 } while (sctx.lushift); 756 757 /* invert diagonal entries for simplier triangular solves */ 758 for (i=0; i<n; i++) { 759 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 760 } 761 762 ierr = PetscFree(rtmp);CHKERRQ(ierr); 763 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 764 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 765 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 766 A->ops->solveadd = MatSolveAdd_SeqAIJ; 767 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 768 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 769 A->assembled = PETSC_TRUE; 770 A->preallocated = PETSC_TRUE; 771 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 772 if (sctx.nshift){ 773 if (info->shiftpd) { 774 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 775 } else if (info->shiftnz) { 776 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 777 } 778 } 779 PetscFunctionReturn(0); 780 } 781 782 /* ----------------------------------------------------------- */ 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatLUFactor_SeqAIJ" 785 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 786 { 787 PetscErrorCode ierr; 788 Mat C; 789 790 PetscFunctionBegin; 791 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 792 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 793 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 794 A->ops->solve = C->ops->solve; 795 A->ops->solvetranspose = C->ops->solvetranspose; 796 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 797 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 /* ----------------------------------------------------------- */ 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSolve_SeqAIJ" 803 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 804 { 805 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 806 IS iscol = a->col,isrow = a->row; 807 PetscErrorCode ierr; 808 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 809 PetscInt nz; 810 const PetscInt *rout,*cout,*r,*c; 811 PetscScalar *x,*tmp,*tmps,sum; 812 const PetscScalar *b; 813 const MatScalar *aa = a->a,*v; 814 815 PetscFunctionBegin; 816 if (!n) PetscFunctionReturn(0); 817 818 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 819 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 820 tmp = a->solve_work; 821 822 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 823 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 824 825 /* forward solve the lower triangular */ 826 tmp[0] = b[*r++]; 827 tmps = tmp; 828 for (i=1; i<n; i++) { 829 v = aa + ai[i] ; 830 vi = aj + ai[i] ; 831 nz = a->diag[i] - ai[i]; 832 sum = b[*r++]; 833 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 834 tmp[i] = sum; 835 } 836 837 /* backward solve the upper triangular */ 838 for (i=n-1; i>=0; i--){ 839 v = aa + a->diag[i] + 1; 840 vi = aj + a->diag[i] + 1; 841 nz = ai[i+1] - a->diag[i] - 1; 842 sum = tmp[i]; 843 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 844 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 845 } 846 847 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 848 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 849 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 850 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 851 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatMatSolve_SeqAIJ" 857 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 858 { 859 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 860 IS iscol = a->col,isrow = a->row; 861 PetscErrorCode ierr; 862 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 863 PetscInt nz,neq; 864 const PetscInt *rout,*cout,*r,*c; 865 PetscScalar *x,*b,*tmp,*tmps,sum; 866 const MatScalar *aa = a->a,*v; 867 PetscTruth bisdense,xisdense; 868 869 PetscFunctionBegin; 870 if (!n) PetscFunctionReturn(0); 871 872 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 873 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 874 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 875 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 876 877 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 878 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 879 880 tmp = a->solve_work; 881 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 882 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 883 884 for (neq=0; neq<B->cmap->n; neq++){ 885 /* forward solve the lower triangular */ 886 tmp[0] = b[r[0]]; 887 tmps = tmp; 888 for (i=1; i<n; i++) { 889 v = aa + ai[i] ; 890 vi = aj + ai[i] ; 891 nz = a->diag[i] - ai[i]; 892 sum = b[r[i]]; 893 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 894 tmp[i] = sum; 895 } 896 /* backward solve the upper triangular */ 897 for (i=n-1; i>=0; i--){ 898 v = aa + a->diag[i] + 1; 899 vi = aj + a->diag[i] + 1; 900 nz = ai[i+1] - a->diag[i] - 1; 901 sum = tmp[i]; 902 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 903 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 904 } 905 906 b += n; 907 x += n; 908 } 909 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 910 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 911 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 912 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 913 ierr = PetscLogFlops(B->cmap->n*(2*a->nz - n));CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 919 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 920 { 921 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 922 IS iscol = a->col,isrow = a->row; 923 PetscErrorCode ierr; 924 const PetscInt *r,*c,*rout,*cout; 925 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 926 PetscInt nz,row; 927 PetscScalar *x,*b,*tmp,*tmps,sum; 928 const MatScalar *aa = a->a,*v; 929 930 PetscFunctionBegin; 931 if (!n) PetscFunctionReturn(0); 932 933 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 934 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 935 tmp = a->solve_work; 936 937 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 938 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 939 940 /* forward solve the lower triangular */ 941 tmp[0] = b[*r++]; 942 tmps = tmp; 943 for (row=1; row<n; row++) { 944 i = rout[row]; /* permuted row */ 945 v = aa + ai[i] ; 946 vi = aj + ai[i] ; 947 nz = a->diag[i] - ai[i]; 948 sum = b[*r++]; 949 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 950 tmp[row] = sum; 951 } 952 953 /* backward solve the upper triangular */ 954 for (row=n-1; row>=0; row--){ 955 i = rout[row]; /* permuted row */ 956 v = aa + a->diag[i] + 1; 957 vi = aj + a->diag[i] + 1; 958 nz = ai[i+1] - a->diag[i] - 1; 959 sum = tmp[row]; 960 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 961 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 962 } 963 964 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 965 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 966 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 968 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 /* ----------------------------------------------------------- */ 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 975 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 976 { 977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 978 PetscErrorCode ierr; 979 PetscInt n = A->rmap->n; 980 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 981 PetscScalar *x; 982 const PetscScalar *b; 983 const MatScalar *aa = a->a; 984 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 985 PetscInt adiag_i,i,nz,ai_i; 986 const MatScalar *v; 987 PetscScalar sum; 988 #endif 989 990 PetscFunctionBegin; 991 if (!n) PetscFunctionReturn(0); 992 993 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 994 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 995 996 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 997 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 998 #else 999 /* forward solve the lower triangular */ 1000 x[0] = b[0]; 1001 for (i=1; i<n; i++) { 1002 ai_i = ai[i]; 1003 v = aa + ai_i; 1004 vi = aj + ai_i; 1005 nz = adiag[i] - ai_i; 1006 sum = b[i]; 1007 while (nz--) sum -= *v++ * x[*vi++]; 1008 x[i] = sum; 1009 } 1010 1011 /* backward solve the upper triangular */ 1012 for (i=n-1; i>=0; i--){ 1013 adiag_i = adiag[i]; 1014 v = aa + adiag_i + 1; 1015 vi = aj + adiag_i + 1; 1016 nz = ai[i+1] - adiag_i - 1; 1017 sum = x[i]; 1018 while (nz--) sum -= *v++ * x[*vi++]; 1019 x[i] = sum*aa[adiag_i]; 1020 } 1021 #endif 1022 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1023 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1024 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1030 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1031 { 1032 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1033 IS iscol = a->col,isrow = a->row; 1034 PetscErrorCode ierr; 1035 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1036 PetscInt nz; 1037 const PetscInt *rout,*cout,*r,*c; 1038 PetscScalar *x,*b,*tmp,sum; 1039 const MatScalar *aa = a->a,*v; 1040 1041 PetscFunctionBegin; 1042 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1043 1044 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1045 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1046 tmp = a->solve_work; 1047 1048 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1049 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1050 1051 /* forward solve the lower triangular */ 1052 tmp[0] = b[*r++]; 1053 for (i=1; i<n; i++) { 1054 v = aa + ai[i] ; 1055 vi = aj + ai[i] ; 1056 nz = a->diag[i] - ai[i]; 1057 sum = b[*r++]; 1058 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1059 tmp[i] = sum; 1060 } 1061 1062 /* backward solve the upper triangular */ 1063 for (i=n-1; i>=0; i--){ 1064 v = aa + a->diag[i] + 1; 1065 vi = aj + a->diag[i] + 1; 1066 nz = ai[i+1] - a->diag[i] - 1; 1067 sum = tmp[i]; 1068 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1069 tmp[i] = sum*aa[a->diag[i]]; 1070 x[*c--] += tmp[i]; 1071 } 1072 1073 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1074 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1075 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1077 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1078 1079 PetscFunctionReturn(0); 1080 } 1081 /* -------------------------------------------------------------------*/ 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1084 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1085 { 1086 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1087 IS iscol = a->col,isrow = a->row; 1088 PetscErrorCode ierr; 1089 const PetscInt *rout,*cout,*r,*c; 1090 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1091 PetscInt nz,*diag = a->diag; 1092 PetscScalar *x,*b,*tmp,s1; 1093 const MatScalar *aa = a->a,*v; 1094 1095 PetscFunctionBegin; 1096 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1097 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1098 tmp = a->solve_work; 1099 1100 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1101 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1102 1103 /* copy the b into temp work space according to permutation */ 1104 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1105 1106 /* forward solve the U^T */ 1107 for (i=0; i<n; i++) { 1108 v = aa + diag[i] ; 1109 vi = aj + diag[i] + 1; 1110 nz = ai[i+1] - diag[i] - 1; 1111 s1 = tmp[i]; 1112 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1113 while (nz--) { 1114 tmp[*vi++ ] -= (*v++)*s1; 1115 } 1116 tmp[i] = s1; 1117 } 1118 1119 /* backward solve the L^T */ 1120 for (i=n-1; i>=0; i--){ 1121 v = aa + diag[i] - 1 ; 1122 vi = aj + diag[i] - 1 ; 1123 nz = diag[i] - ai[i]; 1124 s1 = tmp[i]; 1125 while (nz--) { 1126 tmp[*vi-- ] -= (*v--)*s1; 1127 } 1128 } 1129 1130 /* copy tmp into x according to permutation */ 1131 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1132 1133 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1134 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1135 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1137 1138 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1144 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1145 { 1146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1147 IS iscol = a->col,isrow = a->row; 1148 PetscErrorCode ierr; 1149 const PetscInt *r,*c,*rout,*cout; 1150 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1151 PetscInt nz,*diag = a->diag; 1152 PetscScalar *x,*b,*tmp; 1153 const MatScalar *aa = a->a,*v; 1154 1155 PetscFunctionBegin; 1156 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1157 1158 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1159 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1160 tmp = a->solve_work; 1161 1162 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1163 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1164 1165 /* copy the b into temp work space according to permutation */ 1166 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1167 1168 /* forward solve the U^T */ 1169 for (i=0; i<n; i++) { 1170 v = aa + diag[i] ; 1171 vi = aj + diag[i] + 1; 1172 nz = ai[i+1] - diag[i] - 1; 1173 tmp[i] *= *v++; 1174 while (nz--) { 1175 tmp[*vi++ ] -= (*v++)*tmp[i]; 1176 } 1177 } 1178 1179 /* backward solve the L^T */ 1180 for (i=n-1; i>=0; i--){ 1181 v = aa + diag[i] - 1 ; 1182 vi = aj + diag[i] - 1 ; 1183 nz = diag[i] - ai[i]; 1184 while (nz--) { 1185 tmp[*vi-- ] -= (*v--)*tmp[i]; 1186 } 1187 } 1188 1189 /* copy tmp into x according to permutation */ 1190 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1191 1192 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1193 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1194 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1196 1197 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 /* ----------------------------------------------------------------*/ 1201 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1202 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1206 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1207 { 1208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1209 IS isicol; 1210 PetscErrorCode ierr; 1211 const PetscInt *r,*ic; 1212 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1213 PetscInt *bi,*cols,nnz,*cols_lvl; 1214 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1215 PetscInt i,levels,diagonal_fill; 1216 PetscTruth col_identity,row_identity; 1217 PetscReal f; 1218 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1219 PetscBT lnkbt; 1220 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1221 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1222 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1223 PetscTruth missing; 1224 1225 PetscFunctionBegin; 1226 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1227 f = info->fill; 1228 levels = (PetscInt)info->levels; 1229 diagonal_fill = (PetscInt)info->diagonal_fill; 1230 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1231 1232 /* special case that simply copies fill pattern */ 1233 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1234 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1235 if (!levels && row_identity && col_identity) { 1236 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1237 fact->factor = MAT_FACTOR_ILU; 1238 (fact)->info.factor_mallocs = 0; 1239 (fact)->info.fill_ratio_given = info->fill; 1240 (fact)->info.fill_ratio_needed = 1.0; 1241 b = (Mat_SeqAIJ*)(fact)->data; 1242 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1243 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1244 b->row = isrow; 1245 b->col = iscol; 1246 b->icol = isicol; 1247 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1248 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1249 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1250 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1251 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1256 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1257 1258 /* get new row pointers */ 1259 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1260 bi[0] = 0; 1261 /* bdiag is location of diagonal in factor */ 1262 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1263 bdiag[0] = 0; 1264 1265 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1266 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1267 1268 /* create a linked list for storing column indices of the active row */ 1269 nlnk = n + 1; 1270 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1271 1272 /* initial FreeSpace size is f*(ai[n]+1) */ 1273 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1274 current_space = free_space; 1275 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1276 current_space_lvl = free_space_lvl; 1277 1278 for (i=0; i<n; i++) { 1279 nzi = 0; 1280 /* copy current row into linked list */ 1281 nnz = ai[r[i]+1] - ai[r[i]]; 1282 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1283 cols = aj + ai[r[i]]; 1284 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1285 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1286 nzi += nlnk; 1287 1288 /* make sure diagonal entry is included */ 1289 if (diagonal_fill && lnk[i] == -1) { 1290 fm = n; 1291 while (lnk[fm] < i) fm = lnk[fm]; 1292 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1293 lnk[fm] = i; 1294 lnk_lvl[i] = 0; 1295 nzi++; dcount++; 1296 } 1297 1298 /* add pivot rows into the active row */ 1299 nzbd = 0; 1300 prow = lnk[n]; 1301 while (prow < i) { 1302 nnz = bdiag[prow]; 1303 cols = bj_ptr[prow] + nnz + 1; 1304 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1305 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1306 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1307 nzi += nlnk; 1308 prow = lnk[prow]; 1309 nzbd++; 1310 } 1311 bdiag[i] = nzbd; 1312 bi[i+1] = bi[i] + nzi; 1313 1314 /* if free space is not available, make more free space */ 1315 if (current_space->local_remaining<nzi) { 1316 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1317 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1318 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1319 reallocs++; 1320 } 1321 1322 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1323 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1324 bj_ptr[i] = current_space->array; 1325 bjlvl_ptr[i] = current_space_lvl->array; 1326 1327 /* make sure the active row i has diagonal entry */ 1328 if (*(bj_ptr[i]+bdiag[i]) != i) { 1329 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1330 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1331 } 1332 1333 current_space->array += nzi; 1334 current_space->local_used += nzi; 1335 current_space->local_remaining -= nzi; 1336 current_space_lvl->array += nzi; 1337 current_space_lvl->local_used += nzi; 1338 current_space_lvl->local_remaining -= nzi; 1339 } 1340 1341 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1342 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1343 1344 /* destroy list of free space and other temporary arrays */ 1345 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1346 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1347 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1348 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1349 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1350 1351 #if defined(PETSC_USE_INFO) 1352 { 1353 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1354 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1355 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1356 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1357 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1358 if (diagonal_fill) { 1359 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1360 } 1361 } 1362 #endif 1363 1364 /* put together the new matrix */ 1365 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1366 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1367 b = (Mat_SeqAIJ*)(fact)->data; 1368 b->free_a = PETSC_TRUE; 1369 b->free_ij = PETSC_TRUE; 1370 b->singlemalloc = PETSC_FALSE; 1371 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1372 b->j = bj; 1373 b->i = bi; 1374 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1375 b->diag = bdiag; 1376 b->ilen = 0; 1377 b->imax = 0; 1378 b->row = isrow; 1379 b->col = iscol; 1380 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1381 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1382 b->icol = isicol; 1383 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1384 /* In b structure: Free imax, ilen, old a, old j. 1385 Allocate bdiag, solve_work, new a, new j */ 1386 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1387 b->maxnz = b->nz = bi[n] ; 1388 (fact)->info.factor_mallocs = reallocs; 1389 (fact)->info.fill_ratio_given = f; 1390 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1391 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1392 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1399 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1400 { 1401 Mat C = B; 1402 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1403 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1404 IS ip=b->row,iip = b->icol; 1405 PetscErrorCode ierr; 1406 const PetscInt *rip,*riip; 1407 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1408 PetscInt *ai=a->i,*aj=a->j; 1409 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1410 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1411 PetscReal zeropivot,rs,shiftnz; 1412 PetscReal shiftpd; 1413 ChShift_Ctx sctx; 1414 PetscInt newshift; 1415 PetscTruth perm_identity; 1416 1417 PetscFunctionBegin; 1418 1419 shiftnz = info->shiftnz; 1420 shiftpd = info->shiftpd; 1421 zeropivot = info->zeropivot; 1422 1423 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1424 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1425 1426 /* initialization */ 1427 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1428 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1429 jl = il + mbs; 1430 rtmp = (MatScalar*)(jl + mbs); 1431 1432 sctx.shift_amount = 0; 1433 sctx.nshift = 0; 1434 do { 1435 sctx.chshift = PETSC_FALSE; 1436 for (i=0; i<mbs; i++) { 1437 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1438 } 1439 1440 for (k = 0; k<mbs; k++){ 1441 bval = ba + bi[k]; 1442 /* initialize k-th row by the perm[k]-th row of A */ 1443 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1444 for (j = jmin; j < jmax; j++){ 1445 col = riip[aj[j]]; 1446 if (col >= k){ /* only take upper triangular entry */ 1447 rtmp[col] = aa[j]; 1448 *bval++ = 0.0; /* for in-place factorization */ 1449 } 1450 } 1451 /* shift the diagonal of the matrix */ 1452 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1453 1454 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1455 dk = rtmp[k]; 1456 i = jl[k]; /* first row to be added to k_th row */ 1457 1458 while (i < k){ 1459 nexti = jl[i]; /* next row to be added to k_th row */ 1460 1461 /* compute multiplier, update diag(k) and U(i,k) */ 1462 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1463 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1464 dk += uikdi*ba[ili]; 1465 ba[ili] = uikdi; /* -U(i,k) */ 1466 1467 /* add multiple of row i to k-th row */ 1468 jmin = ili + 1; jmax = bi[i+1]; 1469 if (jmin < jmax){ 1470 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1471 /* update il and jl for row i */ 1472 il[i] = jmin; 1473 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1474 } 1475 i = nexti; 1476 } 1477 1478 /* shift the diagonals when zero pivot is detected */ 1479 /* compute rs=sum of abs(off-diagonal) */ 1480 rs = 0.0; 1481 jmin = bi[k]+1; 1482 nz = bi[k+1] - jmin; 1483 bcol = bj + jmin; 1484 while (nz--){ 1485 rs += PetscAbsScalar(rtmp[*bcol]); 1486 bcol++; 1487 } 1488 1489 sctx.rs = rs; 1490 sctx.pv = dk; 1491 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1492 1493 if (newshift == 1) { 1494 if (!sctx.shift_amount) { 1495 sctx.shift_amount = 1e-5; 1496 } 1497 break; 1498 } 1499 1500 /* copy data into U(k,:) */ 1501 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1502 jmin = bi[k]+1; jmax = bi[k+1]; 1503 if (jmin < jmax) { 1504 for (j=jmin; j<jmax; j++){ 1505 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1506 } 1507 /* add the k-th row into il and jl */ 1508 il[k] = jmin; 1509 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1510 } 1511 } 1512 } while (sctx.chshift); 1513 ierr = PetscFree(il);CHKERRQ(ierr); 1514 1515 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1516 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1517 1518 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1519 if (perm_identity){ 1520 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1521 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1522 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1523 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1524 } else { 1525 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1526 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1527 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1528 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1529 } 1530 1531 C->assembled = PETSC_TRUE; 1532 C->preallocated = PETSC_TRUE; 1533 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1534 if (sctx.nshift){ 1535 if (shiftnz) { 1536 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1537 } else if (shiftpd) { 1538 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1539 } 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1546 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1547 { 1548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1549 Mat_SeqSBAIJ *b; 1550 PetscErrorCode ierr; 1551 PetscTruth perm_identity,missing; 1552 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1553 const PetscInt *rip,*riip; 1554 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1555 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1556 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1557 PetscReal fill=info->fill,levels=info->levels; 1558 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1559 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1560 PetscBT lnkbt; 1561 IS iperm; 1562 1563 PetscFunctionBegin; 1564 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1565 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1566 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1567 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1568 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1569 1570 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1571 ui[0] = 0; 1572 1573 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1574 if (!levels && perm_identity) { 1575 1576 for (i=0; i<am; i++) { 1577 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1578 } 1579 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1580 cols = uj; 1581 for (i=0; i<am; i++) { 1582 aj = a->j + a->diag[i]; 1583 ncols = ui[i+1] - ui[i]; 1584 for (j=0; j<ncols; j++) *cols++ = *aj++; 1585 } 1586 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1587 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1588 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1589 1590 /* initialization */ 1591 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1592 1593 /* jl: linked list for storing indices of the pivot rows 1594 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1595 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1596 il = jl + am; 1597 uj_ptr = (PetscInt**)(il + am); 1598 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1599 for (i=0; i<am; i++){ 1600 jl[i] = am; il[i] = 0; 1601 } 1602 1603 /* create and initialize a linked list for storing column indices of the active row k */ 1604 nlnk = am + 1; 1605 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1606 1607 /* initial FreeSpace size is fill*(ai[am]+1) */ 1608 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1609 current_space = free_space; 1610 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1611 current_space_lvl = free_space_lvl; 1612 1613 for (k=0; k<am; k++){ /* for each active row k */ 1614 /* initialize lnk by the column indices of row rip[k] of A */ 1615 nzk = 0; 1616 ncols = ai[rip[k]+1] - ai[rip[k]]; 1617 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1618 ncols_upper = 0; 1619 for (j=0; j<ncols; j++){ 1620 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1621 if (riip[i] >= k){ /* only take upper triangular entry */ 1622 ajtmp[ncols_upper] = i; 1623 ncols_upper++; 1624 } 1625 } 1626 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1627 nzk += nlnk; 1628 1629 /* update lnk by computing fill-in for each pivot row to be merged in */ 1630 prow = jl[k]; /* 1st pivot row */ 1631 1632 while (prow < k){ 1633 nextprow = jl[prow]; 1634 1635 /* merge prow into k-th row */ 1636 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1637 jmax = ui[prow+1]; 1638 ncols = jmax-jmin; 1639 i = jmin - ui[prow]; 1640 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1641 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1642 j = *(uj - 1); 1643 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1644 nzk += nlnk; 1645 1646 /* update il and jl for prow */ 1647 if (jmin < jmax){ 1648 il[prow] = jmin; 1649 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1650 } 1651 prow = nextprow; 1652 } 1653 1654 /* if free space is not available, make more free space */ 1655 if (current_space->local_remaining<nzk) { 1656 i = am - k + 1; /* num of unfactored rows */ 1657 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1658 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1659 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1660 reallocs++; 1661 } 1662 1663 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1664 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1665 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1666 1667 /* add the k-th row into il and jl */ 1668 if (nzk > 1){ 1669 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1670 jl[k] = jl[i]; jl[i] = k; 1671 il[k] = ui[k] + 1; 1672 } 1673 uj_ptr[k] = current_space->array; 1674 uj_lvl_ptr[k] = current_space_lvl->array; 1675 1676 current_space->array += nzk; 1677 current_space->local_used += nzk; 1678 current_space->local_remaining -= nzk; 1679 1680 current_space_lvl->array += nzk; 1681 current_space_lvl->local_used += nzk; 1682 current_space_lvl->local_remaining -= nzk; 1683 1684 ui[k+1] = ui[k] + nzk; 1685 } 1686 1687 #if defined(PETSC_USE_INFO) 1688 if (ai[am] != 0) { 1689 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1690 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1691 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1692 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1693 } else { 1694 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1695 } 1696 #endif 1697 1698 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1699 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1700 ierr = PetscFree(jl);CHKERRQ(ierr); 1701 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1702 1703 /* destroy list of free space and other temporary array(s) */ 1704 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1705 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1706 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1707 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1708 1709 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1710 1711 /* put together the new matrix in MATSEQSBAIJ format */ 1712 1713 b = (Mat_SeqSBAIJ*)(fact)->data; 1714 b->singlemalloc = PETSC_FALSE; 1715 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1716 b->j = uj; 1717 b->i = ui; 1718 b->diag = 0; 1719 b->ilen = 0; 1720 b->imax = 0; 1721 b->row = perm; 1722 b->col = perm; 1723 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1724 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1725 b->icol = iperm; 1726 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1727 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1728 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1729 b->maxnz = b->nz = ui[am]; 1730 b->free_a = PETSC_TRUE; 1731 b->free_ij = PETSC_TRUE; 1732 1733 (fact)->info.factor_mallocs = reallocs; 1734 (fact)->info.fill_ratio_given = fill; 1735 if (ai[am] != 0) { 1736 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1737 } else { 1738 (fact)->info.fill_ratio_needed = 0.0; 1739 } 1740 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1746 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1747 { 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1749 Mat_SeqSBAIJ *b; 1750 PetscErrorCode ierr; 1751 PetscTruth perm_identity; 1752 PetscReal fill = info->fill; 1753 const PetscInt *rip,*riip; 1754 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1755 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1756 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1757 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1758 PetscBT lnkbt; 1759 IS iperm; 1760 1761 PetscFunctionBegin; 1762 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1763 /* check whether perm is the identity mapping */ 1764 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1765 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1766 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1767 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1768 1769 /* initialization */ 1770 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1771 ui[0] = 0; 1772 1773 /* jl: linked list for storing indices of the pivot rows 1774 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1775 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1776 il = jl + am; 1777 cols = il + am; 1778 ui_ptr = (PetscInt**)(cols + am); 1779 for (i=0; i<am; i++){ 1780 jl[i] = am; il[i] = 0; 1781 } 1782 1783 /* create and initialize a linked list for storing column indices of the active row k */ 1784 nlnk = am + 1; 1785 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1786 1787 /* initial FreeSpace size is fill*(ai[am]+1) */ 1788 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1789 current_space = free_space; 1790 1791 for (k=0; k<am; k++){ /* for each active row k */ 1792 /* initialize lnk by the column indices of row rip[k] of A */ 1793 nzk = 0; 1794 ncols = ai[rip[k]+1] - ai[rip[k]]; 1795 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1796 ncols_upper = 0; 1797 for (j=0; j<ncols; j++){ 1798 i = riip[*(aj + ai[rip[k]] + j)]; 1799 if (i >= k){ /* only take upper triangular entry */ 1800 cols[ncols_upper] = i; 1801 ncols_upper++; 1802 } 1803 } 1804 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1805 nzk += nlnk; 1806 1807 /* update lnk by computing fill-in for each pivot row to be merged in */ 1808 prow = jl[k]; /* 1st pivot row */ 1809 1810 while (prow < k){ 1811 nextprow = jl[prow]; 1812 /* merge prow into k-th row */ 1813 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1814 jmax = ui[prow+1]; 1815 ncols = jmax-jmin; 1816 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1817 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1818 nzk += nlnk; 1819 1820 /* update il and jl for prow */ 1821 if (jmin < jmax){ 1822 il[prow] = jmin; 1823 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1824 } 1825 prow = nextprow; 1826 } 1827 1828 /* if free space is not available, make more free space */ 1829 if (current_space->local_remaining<nzk) { 1830 i = am - k + 1; /* num of unfactored rows */ 1831 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1832 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1833 reallocs++; 1834 } 1835 1836 /* copy data into free space, then initialize lnk */ 1837 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1838 1839 /* add the k-th row into il and jl */ 1840 if (nzk-1 > 0){ 1841 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1842 jl[k] = jl[i]; jl[i] = k; 1843 il[k] = ui[k] + 1; 1844 } 1845 ui_ptr[k] = current_space->array; 1846 current_space->array += nzk; 1847 current_space->local_used += nzk; 1848 current_space->local_remaining -= nzk; 1849 1850 ui[k+1] = ui[k] + nzk; 1851 } 1852 1853 #if defined(PETSC_USE_INFO) 1854 if (ai[am] != 0) { 1855 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1856 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1857 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1858 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1859 } else { 1860 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1861 } 1862 #endif 1863 1864 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1865 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1866 ierr = PetscFree(jl);CHKERRQ(ierr); 1867 1868 /* destroy list of free space and other temporary array(s) */ 1869 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1870 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1871 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1872 1873 /* put together the new matrix in MATSEQSBAIJ format */ 1874 1875 b = (Mat_SeqSBAIJ*)(fact)->data; 1876 b->singlemalloc = PETSC_FALSE; 1877 b->free_a = PETSC_TRUE; 1878 b->free_ij = PETSC_TRUE; 1879 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1880 b->j = uj; 1881 b->i = ui; 1882 b->diag = 0; 1883 b->ilen = 0; 1884 b->imax = 0; 1885 b->row = perm; 1886 b->col = perm; 1887 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1888 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1889 b->icol = iperm; 1890 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1891 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1892 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1893 b->maxnz = b->nz = ui[am]; 1894 1895 (fact)->info.factor_mallocs = reallocs; 1896 (fact)->info.fill_ratio_given = fill; 1897 if (ai[am] != 0) { 1898 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1899 } else { 1900 (fact)->info.fill_ratio_needed = 0.0; 1901 } 1902 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1903 PetscFunctionReturn(0); 1904 } 1905